File: //usr/lib/python3/dist-packages/duplicity/globmatch.py
# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4; encoding:utf8 -*-
#
# Copyright 2002 Ben Escoto <ben@emerose.org>
# Copyright 2007 Kenneth Loafman <kenneth@loafman.com>
# Copyright 2014 Aaron Whitehouse <aaron@whitehouse.kiwi.nz>
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# All functions in this module only accept unicode. Any byte strings should
# be converted to unicode before sending them into this.
import re
from builtins import map
from builtins import range
from builtins import str
class GlobbingError(Exception):
    u"""Something has gone wrong when parsing a glob string"""
    pass
class FilePrefixError(GlobbingError):
    u"""Signals that a specified file doesn't start with correct prefix"""
    pass
def _glob_get_prefix_regexs(glob_str):
    u"""Return list of regexps equivalent to prefixes of glob_str"""
    # Internal. Used by glob_get_normal_sf.
    glob_parts = glob_str.split(u"/")
    if u"" in glob_parts[1:-1]:
        # "" OK if comes first or last, as in /foo/
        raise GlobbingError(u"Consecutive '/'s found in globbing string " +
                            glob_str)
    prefixes = [u"/".join(glob_parts[:i + 1]) for i in range(len(glob_parts))]
    # we must make exception for root "/", only dir to end in slash
    if prefixes[0] == u"":
        prefixes[0] = u"/"
    return list(map(glob_to_regex, prefixes))
def select_fn_from_glob(glob_str, include, ignore_case=False):
    u"""Return a function test_fn(path) which
    tests whether path matches glob, as per the Unix shell rules, taking as
    arguments a path, a glob string and include (0 indicating that the glob
    string is an exclude glob and 1 indicating that it is an include glob,
    returning:
    0 - if the file should be excluded
    1 - if the file should be included
    2 - if the folder should be scanned for any included/excluded files
    None - if the selection function has nothing to say about the file
    Note: including a folder implicitly includes everything within it.
    """
    assert isinstance(glob_str, str)
    glob_ends_w_slash = False
    if glob_str == u"/":
        # If the glob string is '/', it implicitly includes everything
        glob_str = u"/**"
    elif glob_str[-1] == u"/":
        glob_ends_w_slash = True
        # Remove trailing / from directory name (unless that is the entire
        # string)
        glob_str = glob_str[:-1]
    flags = 0
    if ignore_case:
        flags = re.IGNORECASE
    re_comp = lambda r: re.compile(r, re.S | flags)
    # matches what glob matches and any files in directory
    # Resulting regular expression is:
    # ^ string must be at the beginning of path
    # string translated into regex
    # ($|/) nothing must follow except for the end of the string, newline or /
    # Note that the "/" at the end of the regex means that it will match
    # if the glob matches a parent folders of path, i.e. including a folder
    # includes everything within it.
    glob_comp_re = re_comp(u"^%s($|/)" % glob_to_regex(glob_str))
    if glob_ends_w_slash:
        # Creates a version of glob_comp_re that does not match folder contents
        # This can be used later to check that an exact match is actually a
        # folder, rather than a file.
        glob_comp_re_exact = re_comp(u"^%s($)" % glob_to_regex(glob_str))
    if glob_str.find(u"**") != -1:
        # glob_str has a ** in it
        glob_str = glob_str[:glob_str.find(u"**") + 2]  # truncate after **
    # Below regex is translates to:
    # ^ string must be at the beginning of path
    # the regexs corresponding to the parent directories of glob_str
    # $ nothing must follow except for the end of the string or newline
    scan_comp_re = re_comp(u"^(%s)$" %
                           u"|".join(_glob_get_prefix_regexs(glob_str)))
    def test_fn(path):
        assert not path.uc_name[-1] == u"/" or path.uc_name == u"/", \
            u"path.name should never end in '/' during normal operation for " \
            u"normal paths (except '/' alone)\n" \
            u"path.name here is " + path.uc_name + u" and glob is " + glob_str
        if glob_comp_re.match(path.uc_name):
            # Path matches glob, or is contained within a matching folder
            if not glob_ends_w_slash:
                return include
            else:
                # Glob ended with a /, so we need to check any exact match was
                # a folder
                if glob_comp_re_exact.match(path.uc_name):
                    # Not an included file/folder, so must be a folder to match
                    if path.isdir():
                        # Is a directory, so all is well
                        return include
                    else:
                        # Exact match and not a folder
                        return None
                else:
                    # An included file/folder, so normal approach is fine
                    return include
        elif include == 1 and scan_comp_re.match(path.uc_name):
            return 2
        else:
            return None
    return test_fn
def glob_to_regex(pat):
    u"""Returned regular expression equivalent to shell glob pat
    Currently only the ?, *, [], and ** expressions are supported.
    Ranges like [a-z] are currently unsupported.  There is no
    way to quote these special characters.
    This function taken with minor modifications from efnmatch.py
    by Donovan Baarda.
    """
    # Internal. Used by glob_get_normal_sf, glob_get_prefix_res and unit tests.
    assert isinstance(pat, str)
    i, n, res = 0, len(pat), u''
    while i < n:
        c, s = pat[i], pat[i:i + 2]
        i = i + 1
        if s == u'**':
            res = res + u'.*'
            i = i + 1
        elif c == u'*':
            res = res + u'[^/]*'
        elif c == u'?':
            res = res + u'[^/]'
        elif c == u'[':
            j = i
            if j < n and pat[j] in u'!^':
                j = j + 1
            if j < n and pat[j] == u']':
                j = j + 1
            while j < n and pat[j] != u']':
                j = j + 1
            if j >= n:
                res = res + u'\\['  # interpret the [ literally
            else:
                # Deal with inside of [..]
                stuff = pat[i:j].replace(u'\\', u'\\\\')
                i = j + 1
                if stuff[0] in u'!^':
                    stuff = u'^' + stuff[1:]
                res = res + u'[' + stuff + u']'
        else:
            res = res + re.escape(c)
    return res