| """ |
| Basic glob match implementation for starlark based on the golang [doublestar](https://github.com/bmatcuk/doublestar/blob/465a339d8daa03b8620e49b8ae541f71651426ad/match.go#L74) library. |
| |
| This was originally developed by @jbedard for use in rules_js |
| (https://github.com/aspect-build/rules_js/blob/6ca32d5199ddc0bf19bd704f591030dc1468ca5f/npm/private/pkg_glob.bzl) |
| to support the pnpm public-hoist-expr option (https://pnpm.io/npmrc#public-hoist-expr). The pnpm |
| implementation and tests were used as a reference implementation: |
| https://github.com/pnpm/pnpm/blob/v7.4.0-2/packages/matcher/src/index.ts |
| https://github.com/pnpm/pnpm/blob/v7.4.0-2/packages/matcher/test/index.ts |
| """ |
| |
| # "forever" (2^30) for ~ while(true) loops |
| _FOREVER = range(1073741824) |
| |
| def _validate_glob(expr): |
| expr_len = len(expr) |
| for i in range(expr_len): |
| if expr[i] == "*" and i < expr_len - 1 and expr[i + 1] == "*": |
| if i > 0 and expr[i - 1] != "/": |
| msg = "glob_match: `**` globstar in expression `{}` must be at the start of the expression or preceeded by `/`".format(expr) |
| fail(msg) |
| if i < expr_len - 2 and expr[i + 2] != "/": |
| msg = "glob_match: `**` globstar in expression `{}` must be at the end of the expression or followed by `/`".format(expr) |
| fail(msg) |
| |
| def is_glob(expr): |
| """Determine if the passed string is a globa expression |
| |
| Args: |
| expr: the potential glob expression |
| |
| Returns: |
| True if the passed string is a globa expression |
| """ |
| |
| return expr.find("*") != -1 or expr.find("?") != -1 |
| |
| def glob_match(expr, path, match_path_separator = False): |
| """Test if the passed path matches the glob expression. |
| |
| `*` A single asterisk stands for zero or more arbitrary characters except for the the path separator `/` if `match_path_separator` is False |
| |
| `?` The question mark stands for exactly one character except for the the path separator `/` if `match_path_separator` is False |
| |
| `**` A double asterisk stands for an arbitrary sequence of 0 or more characters. It is only allowed when preceded by either the beginning of the string or a slash. Likewise it must be followed by a slash or the end of the pattern. |
| |
| Args: |
| expr: the glob expression |
| path: the path against which to match the glob expression |
| match_path_separator: whether or not to match the path separator '/' when matching `*` and `?` expressions |
| |
| Returns: |
| True if the path matches the glob expression |
| """ |
| |
| # See https://github.com/bmatcuk/doublestar/blob/465a339d8daa03b8620e49b8ae541f71651426ad/match.go#L74 |
| # for reference implementation. |
| |
| if expr == "": |
| fail("glob_match: invalid empty glob expression") |
| |
| if expr == "**": |
| # matches everything |
| return True |
| |
| if not is_glob(expr): |
| # the expression is not a glob (does bot have any glob symbols) so the only match is an exact match |
| return expr == path |
| |
| _validate_glob(expr) |
| |
| # Cursor of the latest '**' expression within the path |
| doublestar_expr_backtrack = -1 |
| doublestar_path_backtrack = -1 |
| |
| # Cursor of the latest '*' expression within the path |
| star_expr_backtrack = -1 |
| star_path_backtrack = -1 |
| |
| # Current indexes into path and expression |
| expr_i = 0 |
| expr_len = len(expr) |
| path_i = 0 |
| path_len = len(path) |
| |
| start_of_segment = True |
| |
| for _ in _FOREVER: |
| if path_i >= path_len: |
| break |
| |
| # Potentially advance the expression |
| if expr_i < expr_len: |
| # star |
| if expr[expr_i] == "*": |
| # Advance past the * |
| expr_i = expr_i + 1 |
| |
| # doublestar |
| if expr_i < expr_len and expr[expr_i] == "*": |
| # Assert unsupported ** expressions were prevented by _validate_glob() |
| if not start_of_segment or (expr_i + 1 < expr_len and expr[expr_i + 1] != "/"): |
| fail("glob_match: invalid '**' should be prevented by _validate_glob()") |
| |
| # Advance past the ** |
| expr_i = expr_i + 1 |
| |
| # Trailing /** matches everything |
| if expr_i >= expr_len: |
| return True |
| |
| # Advance past the **/ |
| expr_i = expr_i + 1 |
| |
| # Start the doublestar cursor |
| doublestar_expr_backtrack = expr_i |
| doublestar_path_backtrack = path_i |
| star_expr_backtrack = -1 |
| star_path_backtrack = -1 |
| continue |
| else: |
| # Start the star expression cursor |
| start_of_segment = False |
| star_expr_backtrack = expr_i |
| star_path_backtrack = path_i |
| continue |
| |
| elif expr[expr_i] == "?": |
| start_of_segment = False |
| if match_path_separator or path[path_i] != "/": |
| expr_i = expr_i + 1 |
| path_i = path_i + 1 |
| continue |
| else: |
| break |
| |
| elif path_i < path_len and expr[expr_i] == path[path_i]: |
| start_of_segment = path[path_i] == "/" |
| expr_i = expr_i + 1 |
| path_i = path_i + 1 |
| continue |
| |
| # Did not advance the expression or path. |
| # Advance any star expression if possible. |
| if star_expr_backtrack >= 0 and (match_path_separator or path[star_path_backtrack] != "/"): |
| star_path_backtrack = star_path_backtrack + 1 |
| expr_i = star_expr_backtrack |
| path_i = star_path_backtrack |
| start_of_segment = False |
| continue |
| |
| # Advance any double star expression if possible. |
| if doublestar_expr_backtrack >= 0: |
| super_continue = False |
| |
| # ** backtrack, advance path_i past next separator |
| path_i = doublestar_path_backtrack |
| for _ in _FOREVER: |
| if path_i >= path_len: |
| break |
| |
| path_current = path[path_i] |
| path_i = path_i + 1 |
| |
| if path_current == "/": |
| doublestar_path_backtrack = path_i |
| expr_i = doublestar_expr_backtrack |
| start_of_segment = True |
| super_continue = True |
| break |
| |
| # Succesfully consumed a path segment |
| if super_continue: |
| continue |
| |
| # Failed to advance the path or expression |
| return False |
| |
| # Exited the loop without reaching the end |
| if path_i < path_len: |
| return False |
| |
| # Reached the end of the path, check if the expression ended or is on a final wildcard |
| trailing = expr[expr_i:] |
| return trailing == "" or trailing == "*" or trailing == "**" |