blob: fe2cac965a294b57f1d9b56c5caca7755877876e [file] [log] [blame]
# Copyright 2025 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This module is for implementing PEP508 in starlark as FeatureFlagInfo
"""
load("//python/private:enum.bzl", "enum")
load("//python/private:version.bzl", "version")
# The expression parsing and resolution for the PEP508 is below
#
_STATE = enum(
STRING = "string",
VAR = "var",
OP = "op",
NONE = "none",
)
_BRACKETS = "()"
_OPCHARS = "<>!=~"
_QUOTES = "'\""
_WSP = " \t"
_NON_VERSION_VAR_NAMES = [
"implementation_name",
"os_name",
"platform_machine",
"platform_python_implementation",
"platform_release",
"platform_system",
"sys_platform",
"extra",
]
_AND = "and"
_OR = "or"
_NOT = "not"
_ENV_ALIASES = "_aliases"
def tokenize(marker):
"""Tokenize the input string.
The output will have double-quoted values (i.e. the quoting will be normalized) and all of the whitespace will be trimmed.
Args:
marker: {type}`str` The input to tokenize.
Returns:
The {type}`str` that is the list of recognized tokens that should be parsed.
"""
if not marker:
return []
tokens = []
token = ""
state = _STATE.NONE
char = ""
# Due to the `continue` in the loop, we will be processing chars at a slower pace
for _ in range(2 * len(marker)):
if token and (state == _STATE.NONE or not marker):
if tokens and token == "in" and tokens[-1] == _NOT:
tokens[-1] += " " + token
else:
tokens.append(token)
token = ""
if not marker:
return tokens
char = marker[0]
if char in _BRACKETS:
state = _STATE.NONE
token = char
elif state == _STATE.STRING and char in _QUOTES:
state = _STATE.NONE
token = '"{}"'.format(token)
elif (
(state == _STATE.VAR and not char.isalnum() and char != "_") or
(state == _STATE.OP and char not in _OPCHARS)
):
state = _STATE.NONE
continue # Skip consuming the char below
elif state == _STATE.NONE:
# Transition from _STATE.NONE to something or stay in NONE
if char in _QUOTES:
state = _STATE.STRING
elif char.isalnum():
state = _STATE.VAR
token += char
elif char in _OPCHARS:
state = _STATE.OP
token += char
elif char in _WSP:
state = _STATE.NONE
else:
fail("BUG: Cannot parse '{}' in {} ({})".format(char, state, marker))
else:
token += char
# Consume the char
marker = marker[1:]
return fail("BUG: failed to process the marker in allocated cycles: {}".format(marker))
def evaluate(marker, *, env, strict = True, **kwargs):
"""Evaluate the marker against a given env.
Args:
marker: {type}`str` The string marker to evaluate.
env: {type}`dict[str, str]` The environment to evaluate the marker against.
strict: {type}`bool` A setting to not fail on missing values in the env.
**kwargs: Extra kwargs to be passed to the expression evaluator.
Returns:
The {type}`bool | str` If the marker is compatible with the given env. If strict is
`False`, then the output type is `str` which will represent the remaining
expression that has not been evaluated.
"""
tokens = tokenize(marker)
ast = _new_expr(marker = marker, **kwargs)
for _ in range(len(tokens) * 2):
if not tokens:
break
tokens = ast.parse(env = env, tokens = tokens, strict = strict)
if not tokens:
return ast.value()
fail("Could not evaluate: {}".format(marker))
_STRING_REPLACEMENTS = {
"!=": "neq",
"(": "_",
")": "_",
"<": "lt",
"<=": "lteq",
"==": "eq",
"===": "eeq",
">": "gt",
">=": "gteq",
"not in": "not_in",
"~==": "cmp",
}
def to_string(marker):
return "_".join([
_STRING_REPLACEMENTS.get(t, t)
for t in tokenize(marker)
]).replace("\"", "")
def _and_fn(x, y):
"""Our custom `and` evaluation function.
Allow partial evaluation if one of the values is a string, return the
string value because that means that `marker_expr` was set to
`strict = False` and we are only evaluating what we can.
"""
if not (x and y):
return False
x_is_str = type(x) == type("")
y_is_str = type(y) == type("")
if x_is_str and y_is_str:
return "{} and {}".format(x, y)
elif x_is_str:
return x
else:
return y
def _or_fn(x, y):
"""Our custom `or` evaluation function.
Allow partial evaluation if one of the values is a string, return the
string value because that means that `marker_expr` was set to
`strict = False` and we are only evaluating what we can.
"""
x_is_str = type(x) == type("")
y_is_str = type(y) == type("")
if x_is_str and y_is_str:
return "{} or {}".format(x, y) if x and y else ""
elif x_is_str:
return "" if y else x
elif y_is_str:
return "" if x else y
else:
return x or y
def _not_fn(x):
"""Our custom `not` evaluation function.
Allow partial evaluation if the value is a string.
"""
if type(x) == type(""):
return "not {}".format(x)
else:
return not x
def _new_expr(
*,
marker,
and_fn = _and_fn,
or_fn = _or_fn,
not_fn = _not_fn):
# buildifier: disable=uninitialized
self = struct(
marker = marker,
tree = [],
parse = lambda **kwargs: _parse(self, **kwargs),
value = lambda: _value(self),
# This is a way for us to have a handle to the currently constructed
# expression tree branch.
current = lambda: self._current[-1] if self._current else None,
_current = [],
_and = and_fn,
_or = or_fn,
_not = not_fn,
)
return self
def _parse(self, *, env, tokens, strict = False):
"""The parse function takes the consumed tokens and returns the remaining."""
token, remaining = tokens[0], tokens[1:]
if token == "(":
expr = _open_parenthesis(self)
elif token == ")":
expr = _close_parenthesis(self)
elif token == _AND:
expr = _and_expr(self)
elif token == _OR:
expr = _or_expr(self)
elif token == _NOT:
expr = _not_expr(self)
else:
expr = marker_expr(env = env, strict = strict, *tokens[:3])
remaining = tokens[3:]
_append(self, expr)
return remaining
def _value(self):
"""Evaluate the expression tree"""
if not self.tree:
# Basic case where no marker should evaluate to True
return True
for _ in range(len(self.tree)):
if len(self.tree) == 1:
return self.tree[0]
# Resolve all of the `or` expressions as it is safe to do now since all
# `and` and `not` expressions have been taken care of by now.
if getattr(self.tree[-2], "op", None) == _OR:
current = self.tree.pop()
self.tree[-1] = self.tree[-1].value(current)
else:
break
fail("BUG: invalid state: {}".format(self.tree))
def marker_expr(left, op, right, *, env, strict = True):
"""Evaluate a marker expression
Args:
left: {type}`str` the env identifier or a value quoted in `"`.
op: {type}`str` the operation to carry out.
right: {type}`str` the env identifier or a value quoted in `"`.
strict: {type}`bool` if false, only evaluates the values that are present
in the environment, otherwise returns the original expression.
env: {type}`dict[str, str]` the `env` to substitute `env` identifiers in
the `<left> <op> <right>` expression. Note, if `env` has a key
"_aliases", then we will do normalization so that we can ensure
that e.g. `aarch64` evaluation in the `platform_machine` works the
same way irrespective if the marker uses `arm64` or `aarch64` value
in the expression.
Returns:
{type}`bool` if the expression evaluation result or {type}`str` if the expression
could not be evaluated.
"""
var_name = None
if right not in env and left not in env and not strict:
return "{} {} {}".format(left, op, right)
if left[0] == '"':
var_name = right
right = env[right]
left = left.strip("\"")
if _ENV_ALIASES in env:
# On Windows, Linux, OSX different values may mean the same hardware,
# e.g. Python on Windows returns arm64, but on Linux returns aarch64.
# e.g. Python on Windows returns amd64, but on Linux returns x86_64.
#
# The following normalizes the values
left = env.get(_ENV_ALIASES, {}).get(var_name, {}).get(left, left)
else:
var_name = left
left = env[left]
right = right.strip("\"")
if _ENV_ALIASES in env:
# See the note above on normalization
right = env.get(_ENV_ALIASES, {}).get(var_name, {}).get(right, right)
if var_name in _NON_VERSION_VAR_NAMES:
return _env_expr(left, op, right)
elif var_name.endswith("_version"):
return _version_expr(left, op, right)
else:
# Do not fail here, just evaluate the expression to False.
return False
def _env_expr(left, op, right):
"""Evaluate a string comparison expression"""
if op == "==":
return left == right
elif op == "!=":
return left != right
elif op == "in":
return left in right
elif op == "not in":
return left not in right
elif op == "<":
return left < right
elif op == "<=":
return left <= right
elif op == ">":
return left > right
elif op == ">=":
return left >= right
else:
return fail("unsupported op: '{}' {} '{}'".format(left, op, right))
def _version_expr(left, op, right):
"""Evaluate a version comparison expression"""
_left = version.parse(left)
_right = version.parse(right)
if _left == None or _right == None:
# Per spec, if either can't be normalized to a version, then
# fallback to simple string comparison. Usually this is `platform_version`
# or `platform_release`, which vary depending on platform.
return _env_expr(left, op, right)
if op == "===":
return version.is_eeq(_left, _right)
elif op == "!=":
return version.is_ne(_left, _right)
elif op == "==":
return version.is_eq(_left, _right)
elif op == "<":
return version.is_lt(_left, _right)
elif op == ">":
return version.is_gt(_left, _right)
elif op == "<=":
return version.is_le(_left, _right)
elif op == ">=":
return version.is_ge(_left, _right)
elif op == "~=":
return version.is_compatible(_left, _right)
else:
return False # Let's just ignore the invalid ops
# Code to allowing to combine expressions with logical operators
def _append(self, value):
if value == None:
return
current = self.current() or self
op = getattr(value, "op", None)
if op == _NOT:
current.tree.append(value)
elif op in [_AND, _OR]:
value.append(current.tree[-1])
current.tree[-1] = value
elif not current.tree:
current.tree.append(value)
elif hasattr(current.tree[-1], "append"):
current.tree[-1].append(value)
elif hasattr(current.tree, "_append"):
current.tree._append(value)
else:
fail("Cannot evaluate '{}' in '{}', current: {}".format(value, self.marker, current))
def _open_parenthesis(self):
"""Add an extra node into the tree to perform evaluate inside parenthesis."""
self._current.append(_new_expr(
marker = self.marker,
and_fn = self._and,
or_fn = self._or,
not_fn = self._not,
))
def _close_parenthesis(self):
"""Backtrack and evaluate the expression within parenthesis."""
value = self._current.pop().value()
if type(value) == type(""):
return "({})".format(value)
else:
return value
def _not_expr(self):
"""Add an extra node into the tree to perform an 'not' operation."""
def _append(value):
"""Append a value to the not expression node.
This codifies `not` precedence over `and` and performs backtracking to
evaluate any `not` statements and forward the value to the first `and`
statement if needed.
"""
current = self.current() or self
current.tree[-1] = self._not(value)
for _ in range(len(current.tree)):
if not len(current.tree) > 1:
break
op = getattr(current.tree[-2], "op", None)
if op == None:
pass
elif op == _NOT:
value = current.tree.pop()
current.tree[-1] = self._not(value)
continue
elif op == _AND:
value = current.tree.pop()
current.tree[-1].append(value)
elif op != _OR:
fail("BUG: '{} not' compound is unsupported".format(current.tree[-1]))
break
return struct(
op = _NOT,
append = _append,
)
def _and_expr(self):
"""Add an extra node into the tree to perform an 'and' operation"""
maybe_value = [None]
def _append(value):
"""Append a value to the and expression node.
Here we backtrack, but we only evaluate the current `and` statement -
all of the `not` statements will be by now evaluated and `or`
statements need to be evaluated later.
"""
if maybe_value[0] == None:
maybe_value[0] = value
return
current = self.current() or self
current.tree[-1] = self._and(maybe_value[0], value)
return struct(
op = _AND,
append = _append,
# private fields that help debugging
_maybe_value = maybe_value,
)
def _or_expr(self):
"""Add an extra node into the tree to perform an 'or' operation"""
maybe_value = [None]
def _append(value):
"""Append a value to the or expression node.
Here we just append the extra values to the tree and the `or`
statements will be evaluated in the _value() function.
"""
if maybe_value[0] == None:
maybe_value[0] = value
return
current = self.current() or self
current.tree.append(value)
return struct(
op = _OR,
value = lambda x: self._or(maybe_value[0], x),
append = _append,
# private fields that help debugging
_maybe_value = maybe_value,
)