Permalink
Cannot retrieve contributors at this time
232 lines (202 sloc)
7.56 KB
"""Filename globbing utility.""" | |
import os | |
import re | |
import fnmatch | |
import itertools | |
import stat | |
import sys | |
__all__ = ["glob", "iglob", "escape"] | |
def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False): | |
"""Return a list of paths matching a pathname pattern. | |
The pattern may contain simple shell-style wildcards a la | |
fnmatch. However, unlike fnmatch, filenames starting with a | |
dot are special cases that are not matched by '*' and '?' | |
patterns. | |
If recursive is true, the pattern '**' will match any files and | |
zero or more directories and subdirectories. | |
""" | |
return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive)) | |
def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False): | |
"""Return an iterator which yields the paths matching a pathname pattern. | |
The pattern may contain simple shell-style wildcards a la | |
fnmatch. However, unlike fnmatch, filenames starting with a | |
dot are special cases that are not matched by '*' and '?' | |
patterns. | |
If recursive is true, the pattern '**' will match any files and | |
zero or more directories and subdirectories. | |
""" | |
sys.audit("glob.glob", pathname, recursive) | |
sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd) | |
if root_dir is not None: | |
root_dir = os.fspath(root_dir) | |
else: | |
root_dir = pathname[:0] | |
it = _iglob(pathname, root_dir, dir_fd, recursive, False) | |
if not pathname or recursive and _isrecursive(pathname[:2]): | |
try: | |
s = next(it) # skip empty string | |
if s: | |
it = itertools.chain((s,), it) | |
except StopIteration: | |
pass | |
return it | |
def _iglob(pathname, root_dir, dir_fd, recursive, dironly): | |
dirname, basename = os.path.split(pathname) | |
if not has_magic(pathname): | |
assert not dironly | |
if basename: | |
if _lexists(_join(root_dir, pathname), dir_fd): | |
yield pathname | |
else: | |
# Patterns ending with a slash should match only directories | |
if _isdir(_join(root_dir, dirname), dir_fd): | |
yield pathname | |
return | |
if not dirname: | |
if recursive and _isrecursive(basename): | |
yield from _glob2(root_dir, basename, dir_fd, dironly) | |
else: | |
yield from _glob1(root_dir, basename, dir_fd, dironly) | |
return | |
# `os.path.split()` returns the argument itself as a dirname if it is a | |
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path | |
# contains magic characters (i.e. r'\\?\C:'). | |
if dirname != pathname and has_magic(dirname): | |
dirs = _iglob(dirname, root_dir, dir_fd, recursive, True) | |
else: | |
dirs = [dirname] | |
if has_magic(basename): | |
if recursive and _isrecursive(basename): | |
glob_in_dir = _glob2 | |
else: | |
glob_in_dir = _glob1 | |
else: | |
glob_in_dir = _glob0 | |
for dirname in dirs: | |
for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly): | |
yield os.path.join(dirname, name) | |
# These 2 helper functions non-recursively glob inside a literal directory. | |
# They return a list of basenames. _glob1 accepts a pattern while _glob0 | |
# takes a literal basename (so it only has to check for its existence). | |
def _glob1(dirname, pattern, dir_fd, dironly): | |
names = list(_iterdir(dirname, dir_fd, dironly)) | |
if not _ishidden(pattern): | |
names = (x for x in names if not _ishidden(x)) | |
return fnmatch.filter(names, pattern) | |
def _glob0(dirname, basename, dir_fd, dironly): | |
if basename: | |
if _lexists(_join(dirname, basename), dir_fd): | |
return [basename] | |
else: | |
# `os.path.split()` returns an empty basename for paths ending with a | |
# directory separator. 'q*x/' should match only directories. | |
if _isdir(dirname, dir_fd): | |
return [basename] | |
return [] | |
# Following functions are not public but can be used by third-party code. | |
def glob0(dirname, pattern): | |
return _glob0(dirname, pattern, None, False) | |
def glob1(dirname, pattern): | |
return _glob1(dirname, pattern, None, False) | |
# This helper function recursively yields relative pathnames inside a literal | |
# directory. | |
def _glob2(dirname, pattern, dir_fd, dironly): | |
assert _isrecursive(pattern) | |
yield pattern[:0] | |
yield from _rlistdir(dirname, dir_fd, dironly) | |
# If dironly is false, yields all file names inside a directory. | |
# If dironly is true, yields only directory names. | |
def _iterdir(dirname, dir_fd, dironly): | |
try: | |
fd = None | |
fsencode = None | |
if dir_fd is not None: | |
if dirname: | |
fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd) | |
else: | |
arg = dir_fd | |
if isinstance(dirname, bytes): | |
fsencode = os.fsencode | |
elif dirname: | |
arg = dirname | |
elif isinstance(dirname, bytes): | |
arg = bytes(os.curdir, 'ASCII') | |
else: | |
arg = os.curdir | |
try: | |
with os.scandir(arg) as it: | |
for entry in it: | |
try: | |
if not dironly or entry.is_dir(): | |
if fsencode is not None: | |
yield fsencode(entry.name) | |
else: | |
yield entry.name | |
except OSError: | |
pass | |
finally: | |
if fd is not None: | |
os.close(fd) | |
except OSError: | |
return | |
# Recursively yields relative pathnames inside a literal directory. | |
def _rlistdir(dirname, dir_fd, dironly): | |
names = list(_iterdir(dirname, dir_fd, dironly)) | |
for x in names: | |
if not _ishidden(x): | |
yield x | |
path = _join(dirname, x) if dirname else x | |
for y in _rlistdir(path, dir_fd, dironly): | |
yield _join(x, y) | |
def _lexists(pathname, dir_fd): | |
# Same as os.path.lexists(), but with dir_fd | |
if dir_fd is None: | |
return os.path.lexists(pathname) | |
try: | |
os.lstat(pathname, dir_fd=dir_fd) | |
except (OSError, ValueError): | |
return False | |
else: | |
return True | |
def _isdir(pathname, dir_fd): | |
# Same as os.path.isdir(), but with dir_fd | |
if dir_fd is None: | |
return os.path.isdir(pathname) | |
try: | |
st = os.stat(pathname, dir_fd=dir_fd) | |
except (OSError, ValueError): | |
return False | |
else: | |
return stat.S_ISDIR(st.st_mode) | |
def _join(dirname, basename): | |
# It is common if dirname or basename is empty | |
if not dirname or not basename: | |
return dirname or basename | |
return os.path.join(dirname, basename) | |
magic_check = re.compile('([*?[])') | |
magic_check_bytes = re.compile(b'([*?[])') | |
def has_magic(s): | |
if isinstance(s, bytes): | |
match = magic_check_bytes.search(s) | |
else: | |
match = magic_check.search(s) | |
return match is not None | |
def _ishidden(path): | |
return path[0] in ('.', b'.'[0]) | |
def _isrecursive(pattern): | |
if isinstance(pattern, bytes): | |
return pattern == b'**' | |
else: | |
return pattern == '**' | |
def escape(pathname): | |
"""Escape all special characters. | |
""" | |
# Escaping is done by wrapping any of "*?[" between square brackets. | |
# Metacharacters do not work in the drive part and shouldn't be escaped. | |
drive, pathname = os.path.splitdrive(pathname) | |
if isinstance(pathname, bytes): | |
pathname = magic_check_bytes.sub(br'[\1]', pathname) | |
else: | |
pathname = magic_check.sub(r'[\1]', pathname) | |
return drive + pathname | |
_dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) |