blob: 4efd46690acefc0eadaee8317a533c8f1c63e9f9 [file] [log] [blame]
%shebang%
# vim: syntax=python
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
from os.path import dirname, join, basename
import subprocess
import uuid
import shutil
# NOTE: The sentinel strings are split (e.g., "%stage2" + "_bootstrap%") so that
# the substitution logic won't replace them. This allows runtime detection of
# unsubstituted placeholders, which occurs when native py_binary is used in
# external repositories. In that case, we fall back to %main% which Bazel's
# native rule does substitute.
_STAGE2_BOOTSTRAP_SENTINEL = "%stage2" + "_bootstrap%"
# runfiles-root-relative path
STAGE2_BOOTSTRAP="%stage2_bootstrap%"
# NOTE: The fallback logic from stage2_bootstrap to main is only present
# as a courtesy for an older, unsupported, configuration. It can be removed
# when that case is unlikely to be a concern anymore.
# See https://github.com/bazel-contrib/rules_python/pull/3495
if STAGE2_BOOTSTRAP == _STAGE2_BOOTSTRAP_SENTINEL:
_MAIN_SENTINEL = "%main" + "%"
_main = "%main%"
if _main != _MAIN_SENTINEL and _main:
STAGE2_BOOTSTRAP = _main
else:
STAGE2_BOOTSTRAP = ""
if not STAGE2_BOOTSTRAP:
print("ERROR: %stage2_bootstrap% (or %main%) was not substituted.", file=sys.stderr)
sys.exit(1)
# runfiles-root-relative path to venv's python interpreter
# Empty string if a venv is not setup.
PYTHON_BINARY = '%python_binary%'
# The path to the actual interpreter that is used.
# Typically PYTHON_BINARY is a symlink pointing to this.
# runfiles-root-relative path, absolute path, or single word.
# Used to create a venv at runtime, or when a venv isn't setup.
PYTHON_BINARY_ACTUAL = "%python_binary_actual%"
# 0 or 1.
# 1 if this bootstrap was created for placement within a zipfile. 0 otherwise.
IS_ZIPFILE = "%is_zipfile%" == "1"
# 0 or 1.
# If 1, then a venv will be created at runtime that replicates what would have
# been the build-time structure.
RECREATE_VENV_AT_RUNTIME = "%recreate_venv_at_runtime%" == "1"
# 0 or 1
# If 1, then the path to python will be resolved by running
# PYTHON_BINARY_ACTUAL to determine the actual underlying interpreter.
RESOLVE_PYTHON_BINARY_AT_RUNTIME = "%resolve_python_binary_at_runtime%" == "1"
# venv-relative path to the site-packages
# e.g. lib/python3.12t/site-packages
VENV_REL_SITE_PACKAGES = "%venv_rel_site_packages%"
WORKSPACE_NAME = "%workspace_name%"
# Target-specific interpreter args.
# Sentinel split to detect unsubstituted placeholder (see STAGE2_BOOTSTRAP above).
_INTERPRETER_ARGS_SENTINEL = "%interpreter" + "_args%"
_INTERPRETER_ARGS_RAW = "%interpreter_args%"
if _INTERPRETER_ARGS_RAW == _INTERPRETER_ARGS_SENTINEL:
INTERPRETER_ARGS = []
else:
INTERPRETER_ARGS = [arg for arg in _INTERPRETER_ARGS_RAW.split("\n") if arg]
ADDITIONAL_INTERPRETER_ARGS = os.environ.get("RULES_PYTHON_ADDITIONAL_INTERPRETER_ARGS", "")
EXTRACT_ROOT = os.environ.get("RULES_PYTHON_EXTRACT_ROOT")
def is_running_from_zip():
return IS_ZIPFILE
if is_running_from_zip():
import shutil
import tempfile
import zipfile
else:
import re
# Return True if running on Windows
def is_windows():
return os.name == 'nt'
def get_windows_path_with_unc_prefix(path):
"""Adds UNC prefix after getting a normalized absolute Windows path.
No-op for non-Windows platforms or if running under python2.
"""
path = path.strip()
# No need to add prefix for non-Windows platforms.
# And \\?\ doesn't work in python 2 or on mingw
if not is_windows() or sys.version_info[0] < 3:
return path
# Starting in Windows 10, version 1607(OS build 14393), MAX_PATH limitations have been
# removed from common Win32 file and directory functions.
# Related doc: https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later
import platform
win32_version = None
# Windows 2022 with Python 3.12.8 gives flakey errors, so try a couple times.
for _ in range(3):
try:
win32_version = platform.win32_ver()[1]
break
except (ValueError, KeyError):
pass
if win32_version and win32_version >= '10.0.14393':
return path
# import sysconfig only now to maintain python 2.6 compatibility
import sysconfig
if sysconfig.get_platform() == 'mingw':
return path
# Lets start the unicode fun
unicode_prefix = '\\\\?\\'
if path.startswith(unicode_prefix):
return path
# os.path.abspath returns a normalized absolute path
return unicode_prefix + os.path.abspath(path)
def has_windows_executable_extension(path):
return path.endswith('.exe') or path.endswith('.com') or path.endswith('.bat')
if PYTHON_BINARY and is_windows() and not has_windows_executable_extension(PYTHON_BINARY):
PYTHON_BINARY = PYTHON_BINARY + '.exe'
def search_path(name):
"""Finds a file in a given search path."""
search_path = os.getenv('PATH', os.defpath).split(os.pathsep)
for directory in search_path:
if directory:
path = os.path.join(directory, name)
if os.path.isfile(path) and os.access(path, os.X_OK):
return path
return None
def find_python_binary(runfiles_root):
"""Finds the real Python binary if it's not a normal absolute path."""
if PYTHON_BINARY:
return find_binary(runfiles_root, PYTHON_BINARY)
else:
return find_binary(runfiles_root, PYTHON_BINARY_ACTUAL)
def print_verbose(*args, mapping=None, values=None):
if os.environ.get("RULES_PYTHON_BOOTSTRAP_VERBOSE"):
if mapping is not None:
for key, value in sorted((mapping or {}).items()):
print(
"bootstrap: stage 1:",
*(list(args) + ["{}={}".format(key, repr(value))]),
file=sys.stderr,
flush=True
)
elif values is not None:
for i, v in enumerate(values):
print(
"bootstrap: stage 1:",
*(list(args) + ["[{}] {}".format(i, repr(v))]),
file=sys.stderr,
flush=True
)
else:
print("bootstrap: stage 1:", *args, file=sys.stderr, flush=True)
def find_binary(runfiles_root, bin_name):
"""Finds the real binary if it's not a normal absolute path."""
if not bin_name:
return None
if bin_name.startswith("//"):
# Case 1: Path is a label. Not supported yet.
raise AssertionError(
"Bazel does not support execution of Python interpreters via labels yet"
)
elif os.path.isabs(bin_name):
# Case 2: Absolute path.
return bin_name
# Use normpath() to convert slashes to os.sep on Windows.
elif os.sep in os.path.normpath(bin_name):
# Case 3: Path is relative to the repo root.
return os.path.join(runfiles_root, bin_name)
else:
# Case 4: Path has to be looked up in the search path.
return search_path(bin_name)
def find_runfiles_root(main_rel_path):
"""Finds the runfiles tree."""
# When the calling process used the runfiles manifest to resolve the
# location of this stub script, the path may be expanded. This means
# argv[0] may no longer point to a location inside the runfiles
# directory. We should therefore respect RUNFILES_DIR and
# RUNFILES_MANIFEST_FILE set by the caller.
runfiles_dir = os.environ.get('RUNFILES_DIR', None)
if not runfiles_dir:
runfiles_manifest_file = os.environ.get('RUNFILES_MANIFEST_FILE', '')
if (runfiles_manifest_file.endswith('.runfiles_manifest') or
runfiles_manifest_file.endswith('.runfiles/MANIFEST')):
runfiles_dir = runfiles_manifest_file[:-9]
# Be defensive: the runfiles dir should contain our main entry point. If
# it doesn't, then it must not be our runfiles directory.
if runfiles_dir and os.path.exists(os.path.join(runfiles_dir, main_rel_path)):
return runfiles_dir
stub_filename = sys.argv[0]
# On Windows, the path may contain both forward and backslashes.
# Normalize to the OS separator because the regex used later assumes
# the OS-specific separator.
if is_windows():
stub_filename = stub_filename.replace("/", os.sep)
if not os.path.isabs(stub_filename):
stub_filename = os.path.join(os.getcwd(), stub_filename)
while True:
runfiles_root = stub_filename + ('.exe' if is_windows() else '') + '.runfiles'
if os.path.isdir(runfiles_root):
return runfiles_root
runfiles_pattern = r'(.*\.runfiles)' + (r'\\' if is_windows() else '/') + '.*'
matchobj = re.match(runfiles_pattern, stub_filename)
if matchobj:
return matchobj.group(1)
if not os.path.islink(stub_filename):
break
target = os.readlink(stub_filename)
if os.path.isabs(target):
stub_filename = target
else:
stub_filename = os.path.join(os.path.dirname(stub_filename), target)
raise AssertionError('Cannot find .runfiles directory for %s' % sys.argv[0])
def extract_zip(zip_path, dest_dir):
"""Extracts the contents of a zip file, preserving the unix file mode bits.
These include the permission bits, and in particular, the executable bit.
Ideally the zipfile module should set these bits, but it doesn't. See:
https://bugs.python.org/issue15795.
Args:
zip_path: The path to the zip file to extract
dest_dir: The path to the destination directory
"""
zip_path = get_windows_path_with_unc_prefix(zip_path)
dest_dir = get_windows_path_with_unc_prefix(dest_dir)
with zipfile.ZipFile(zip_path) as zf:
for info in zf.infolist():
zf.extract(info, dest_dir)
# UNC-prefixed paths must be absolute/normalized. See
# https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file#maximum-path-length-limitation
file_path = os.path.abspath(os.path.join(dest_dir, info.filename))
# The Unix st_mode bits (see "man 7 inode") are stored in the upper 16
# bits of external_attr.
attrs = info.external_attr >> 16
# Symlink bit in st_mode is 0o120000.
if (attrs & 0o170000) == 0o120000:
with open(file_path, "r") as f:
target = f.read()
os.remove(file_path)
os.symlink(target, file_path)
# Of those, we set the lower 12 bits, which are the
# file mode bits (since the file type bits can't be set by chmod anyway).
elif attrs != 0: # Rumor has it these can be 0 for zips created on Windows.
os.chmod(file_path, attrs & 0o7777)
# Create the runfiles tree by extracting the zip file
def create_runfiles_root():
temp_dir = tempfile.mkdtemp('', 'Bazel.runfiles_')
extract_zip(os.path.dirname(__file__), temp_dir)
# IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's
# important that deletion code be in sync with this directory structure
return os.path.join(temp_dir, 'runfiles')
def _create_venv(runfiles_root):
runfiles_venv = join(runfiles_root, dirname(dirname(PYTHON_BINARY)))
if EXTRACT_ROOT:
venv = join(EXTRACT_ROOT, runfiles_venv)
os.makedirs(venv, exist_ok=True)
cleanup_dir = None
else:
import tempfile
venv = tempfile.mkdtemp("", f"bazel.{basename(runfiles_venv)}.")
cleanup_dir = venv
python_exe_actual = find_binary(runfiles_root, PYTHON_BINARY_ACTUAL)
# See stage1_bootstrap_template.sh for details on this code path. In short,
# this handles when the build-time python version doesn't match runtime
# and if the initially resolved python_exe_actual is a wrapper script.
if RESOLVE_PYTHON_BINARY_AT_RUNTIME:
src = f"""
import sys, site
print(sys.executable)
print(site.getsitepackages(["{venv}"])[-1])
"""
output = subprocess.check_output([python_exe_actual, "-I"], shell=True,
encoding = "utf8", input=src)
output = output.strip().split("\n")
python_exe_actual = output[0]
venv_site_packages = output[1]
os.makedirs(dirname(venv_site_packages), exist_ok=True)
runfiles_venv_site_packages = join(runfiles_venv, VENV_REL_SITE_PACKAGES)
else:
python_exe_actual = find_binary(runfiles_root, PYTHON_BINARY_ACTUAL)
venv_site_packages = join(venv, "lib")
runfiles_venv_site_packages = join(runfiles_venv, "lib")
if python_exe_actual is None:
raise AssertionError('Could not find python binary: ' + repr(PYTHON_BINARY_ACTUAL))
venv_bin = join(venv, "bin")
try:
os.mkdir(venv_bin)
except FileExistsError as e:
pass
# Match the basename; some tools, e.g. pyvenv key off the executable name
venv_python_exe = join(venv_bin, os.path.basename(python_exe_actual))
_symlink_exist_ok(from_=venv_python_exe, to=python_exe_actual)
_symlink_exist_ok(from_=join(venv, "lib"), to=join(runfiles_venv, "lib"))
_symlink_exist_ok(from_=venv_site_packages, to=runfiles_venv_site_packages)
_symlink_exist_ok(from_=join(venv, "pyvenv.cfg"), to=join(runfiles_venv, "pyvenv.cfg"))
return cleanup_dir, venv_python_exe
def runfiles_envvar(runfiles_root):
"""Finds the runfiles manifest or the runfiles directory.
Returns:
A tuple of (var_name, var_value) where var_name is either 'RUNFILES_DIR' or
'RUNFILES_MANIFEST_FILE' and var_value is the path to that directory or
file, or (None, None) if runfiles couldn't be found.
"""
# If this binary is the data-dependency of another one, the other sets
# RUNFILES_MANIFEST_FILE or RUNFILES_DIR for our sake.
runfiles = os.environ.get('RUNFILES_MANIFEST_FILE', None)
if runfiles:
return ('RUNFILES_MANIFEST_FILE', runfiles)
runfiles = os.environ.get('RUNFILES_DIR', None)
if runfiles:
return ('RUNFILES_DIR', runfiles)
# If running from a zip, there's no manifest file.
if is_running_from_zip():
return ('RUNFILES_DIR', runfiles_root)
# Look for the runfiles "output" manifest, argv[0] + ".runfiles_manifest"
runfiles = runfiles_root + '_manifest'
if os.path.exists(runfiles):
return ('RUNFILES_MANIFEST_FILE', runfiles)
# Look for the runfiles "input" manifest, argv[0] + ".runfiles/MANIFEST"
# Normally .runfiles_manifest and MANIFEST are both present, but the
# former will be missing for zip-based builds or if someone copies the
# runfiles tree elsewhere.
runfiles = os.path.join(runfiles_root, 'MANIFEST')
if os.path.exists(runfiles):
return ('RUNFILES_MANIFEST_FILE', runfiles)
# If running in a sandbox and no environment variables are set, then
# Look for the runfiles next to the binary.
if runfiles_root.endswith('.runfiles') and os.path.isdir(runfiles_root):
return ('RUNFILES_DIR', runfiles_root)
return (None, None)
def execute_file(python_program, main_filename, args, env, runfiles_root,
workspace, delete_dirs):
# type: (str, str, list[str], dict[str, str], str, str|None, str|None) -> ...
"""Executes the given Python file using the various environment settings.
This will not return, and acts much like os.execv, except is much
more restricted, and handles Bazel-related edge cases.
Args:
python_program: (str) Path to the Python binary to use for execution
main_filename: (str) The Python file to execute
args: (list[str]) Additional args to pass to the Python file
env: (dict[str, str]) A dict of environment variables to set for the execution
runfiles_root: (str) Path to the runfiles root directory
workspace: (str|None) Name of the workspace to execute in. This is expected to be a
directory under the runfiles tree.
delete_dirs: (list[str]) directories that should be deleted after the user
program has finished running.
"""
argv = [python_program]
argv.extend(INTERPRETER_ARGS)
additional_interpreter_args = os.environ.pop("RULES_PYTHON_ADDITIONAL_INTERPRETER_ARGS", "")
if additional_interpreter_args:
import shlex
argv.extend(shlex.split(additional_interpreter_args))
argv.append(main_filename)
argv.extend(args)
# We want to use os.execv instead of subprocess.call, which causes
# problems with signal passing (making it difficult to kill
# Bazel). However, these conditions force us to run via
# subprocess.call instead:
#
# - On Windows, os.execv doesn't handle arguments with spaces
# correctly, and it actually starts a subprocess just like
# subprocess.call.
# - When running in a workspace or zip file, we need to clean up the
# workspace after the process finishes so control must return here.
# - If we may need to emit a host config warning after execution, we
# can't execv because we need control to return here. This only
# happens for targets built in the host config.
#
if not (is_windows() or workspace or delete_dirs):
_run_execv(python_program, argv, env)
print_verbose("run: subproc: environ:", mapping=os.environ)
print_verbose("run: subproc: cwd:", workspace)
print_verbose("run: subproc: argv:", values=argv)
ret_code = subprocess.call(
argv, env=env, cwd=workspace)
if delete_dirs:
for delete_dir in delete_dirs:
print_verbose("rmtree:", delete_dir)
shutil.rmtree(delete_dir, True)
sys.exit(ret_code)
def _run_execv(python_program, argv, env):
# type: (str, list[str], dict[str, str]) -> ...
"""Executes the given Python file using the various environment settings."""
os.environ.update(env)
print_verbose("RunExecv: environ:", mapping=os.environ)
print_verbose("RunExecv: python:", python_program)
print_verbose("RunExecv: argv:", values=argv)
try:
os.execv(python_program, argv)
except:
with open(python_program, 'rb') as f:
print_verbose("pyprog head:" + str(f.read(50)))
raise
def _symlink_exist_ok(*, from_, to):
try:
os.symlink(to, from_)
except FileExistsError:
pass
def main():
print_verbose("sys.version:", sys.version)
print_verbose("initial argv:", values=sys.argv)
print_verbose("initial cwd:", os.getcwd())
print_verbose("initial environ:", mapping=os.environ)
print_verbose("initial sys.path:", values=sys.path)
print_verbose("STAGE2_BOOTSTRAP:", STAGE2_BOOTSTRAP)
print_verbose("PYTHON_BINARY:", PYTHON_BINARY)
print_verbose("PYTHON_BINARY_ACTUAL:", PYTHON_BINARY_ACTUAL)
print_verbose("IS_ZIPFILE:", IS_ZIPFILE)
print_verbose("RECREATE_VENV_AT_RUNTIME:", RECREATE_VENV_AT_RUNTIME)
print_verbose("WORKSPACE_NAME :", WORKSPACE_NAME )
print_verbose("bootstrap sys.executable:", sys.executable)
print_verbose("bootstrap sys._base_executable:", sys._base_executable)
print_verbose("bootstrap sys.version:", sys.version)
args = sys.argv[1:]
new_env = {}
# NOTE: We call normpath for two reasons:
# 1. Transform Bazel `foo/bar` to Windows `foo\bar`
# 2. Transform `_main/../foo/main.py` to simply `foo/main.py`, which
# matters if `_main` doesn't exist (which can occur if a binary
# is packaged and needs no artifacts from the main repo)
main_rel_path = os.path.normpath(STAGE2_BOOTSTRAP)
print_verbose("main_rel_path:", main_rel_path)
delete_dirs = []
if is_running_from_zip():
runfiles_root = create_runfiles_root()
# NOTE: dirname() is called because create_runfiles_root() creates a
# sub-directory within a temporary directory, and we want to remove the
# whole temporary directory.
delete_dirs.append(dirname(runfiles_root))
else:
runfiles_root = find_runfiles_root(main_rel_path)
print_verbose("runfiles root:", runfiles_root)
if os.environ.get("RULES_PYTHON_TESTING_TELL_RUNFILES_ROOT"):
new_env["RULES_PYTHON_TESTING_RUNFILES_ROOT"] = runfiles_root
runfiles_envkey, runfiles_envvalue = runfiles_envvar(runfiles_root)
if runfiles_envkey:
new_env[runfiles_envkey] = runfiles_envvalue
# Don't prepend a potentially unsafe path to sys.path
# See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
new_env['PYTHONSAFEPATH'] = '1'
main_filename = os.path.join(runfiles_root, main_rel_path)
main_filename = get_windows_path_with_unc_prefix(main_filename)
assert os.path.exists(main_filename), \
'Cannot exec() %r: file not found.' % main_filename
assert os.access(main_filename, os.R_OK), \
'Cannot exec() %r: file not readable.' % main_filename
python_program = find_python_binary(runfiles_root)
if python_program is None:
raise AssertionError("Could not find python binary: {} or {}".format(
repr(PYTHON_BINARY),
repr(PYTHON_BINARY_ACTUAL)
))
if RECREATE_VENV_AT_RUNTIME:
# When the venv is created at runtime, python_program is PYTHON_BINARY_ACTUAL
# so we have to re-point it to the symlink in the venv
venv, python_program = _create_venv(runfiles_root)
delete_dirs.append(venv)
else:
python_program = find_python_binary(runfiles_root)
# Some older Python versions on macOS (namely Python 3.7) may unintentionally
# leave this environment variable set after starting the interpreter, which
# causes problems with Python subprocesses correctly locating sys.executable,
# which subsequently causes failure to launch on Python 3.11 and later.
if '__PYVENV_LAUNCHER__' in os.environ:
del os.environ['__PYVENV_LAUNCHER__']
new_env.update((key, val) for key, val in os.environ.items() if key not in new_env)
workspace = None
if is_running_from_zip():
# If RUN_UNDER_RUNFILES equals 1, it means we need to
# change directory to the right runfiles directory.
# (So that the data files are accessible)
if os.environ.get('RUN_UNDER_RUNFILES') == '1':
workspace = os.path.join(runfiles_root, WORKSPACE_NAME)
try:
sys.stdout.flush()
# NOTE: execute_file may call execve() and lines after this will never run.
execute_file(
python_program, main_filename, args, new_env, runfiles_root,
workspace,
delete_dirs = delete_dirs,
)
except EnvironmentError:
# This works from Python 2.4 all the way to 3.x.
e = sys.exc_info()[1]
# This exception occurs when os.execv() fails for some reason.
if not getattr(e, 'filename', None):
e.filename = python_program # Add info to error message
raise
if __name__ == '__main__':
main()