blob: 3cc6d854ba1f458eb75ab42804bfce8a2a11f8a4 [file] [log] [blame] [edit]
# Copyright 2023 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""This module provides tools for interacting with Bazel.
It defines two main classes:
BazelRunner: Handles the configuration and execution of Bazel commands.
- Ensures Bazelisk is available or uses a CIPD-defined Bazel version.
- Manages Bazel repository overrides based on the checkout context.
- Constructs Bazel command lines with appropriate arguments for remote
execution (RBE), remote caching, UI settings, and error handling.
- Can execute "programs" defined in a configuration file (e.g.,
pigweed.json), which are sets of Bazel commands. These are run via a
wrapper script that can extract ResultStore links.
- Supports continuing builds on error and downloading all artifacts.
BazelApi: A recipe module API that provides a factory method (`new_runner`)
to create instances of `BazelRunner`. This allows recipes to easily
configure and run Bazel builds and tests.
The module integrates with other recipe systems like CIPD for Bazel
installation, Buildbucket for build information (e.g., RBE instance names),
and Checkout for repository context.
"""
from __future__ import annotations
import contextlib
import dataclasses
import json
import re
import shlex
from collections.abc import Mapping, Sequence
from typing import TYPE_CHECKING
from recipe_engine import config_types, engine_types, recipe_api, util
from PB.recipe_engine import result as result_pb
from PB.recipe_modules.pigweed.bazel.options import Options
from RECIPE_MODULES.pigweed.gcs_upload import api as gcs_upload_api
from RECIPE_MODULES.recipe_engine.defer import api as defer_api
from RECIPE_MODULES.recipe_engine.futures import api as futures_api
if TYPE_CHECKING: # pragma: no cover
from RECIPE_MODULES.pigweed.checkout import api as checkout_api
DEFAULT_BAZELISK_VERSION = 'latest'
def _ensure_bazelisk(
api: recipe_api.RecipeApi,
cache: Mapping[str, config_types.Path],
version: str,
) -> config_types.Path:
if version in cache:
return cache[version]
ensure_file = api.cipd.EnsureFile()
ensure_file.add_package(
'fuchsia/third_party/bazelisk/${platform}',
version,
)
root = api.path.mkdtemp()
api.cipd.ensure(root, ensure_file, name='ensure bazelisk')
bazelisk = root / 'bazelisk'
cache[version] = bazelisk
return bazelisk
def _rbe_arguments(
api: recipe_api.RecipeApi,
remote: bool = True,
remote_cache: bool = True,
upload_local_results: bool = True,
) -> list[str]:
build = api.buildbucket.build
if 'pigweed.disable_rbe' in build.input.experiments:
return []
# If we're disabling GitHub, we should disable RBE as well. Otherwise we'll
# get cache hits for stuff we shouldn't have access to.
if 'pigweed.disable_github' in build.input.experiments:
return []
rbe_args: list[str] = []
if remote:
# TODO: b/368128573 - Support remote execution on MacOS.
if api.platform.is_linux:
rbe_args.append('--config=remote')
else:
api.step.empty('ignoring remote because not running on Linux')
elif remote_cache:
# --config=remote already implies --config=remote_cache.
rbe_args.append('--config=remote_cache')
if build.builder.project == 'pigweed':
instance_name = 'pigweed-rbe-open'
else:
instance_name = 'pigweed-rbe-private'
if api.buildbucket_util.is_tryjob:
instance_name += '-pre'
rbe_args.append(f'--bes_instance_name={instance_name}')
rbe_args.append(
f'--remote_instance_name=projects/{instance_name}/instances/'
'default-instance'
)
if upload_local_results:
if not remote_cache:
api.step.empty(
'ignoring upload_local_results since remote_cache is False'
)
else:
rbe_args.append('--remote_upload_local_results=true')
return rbe_args
def _override_arguments(
*,
api: recipe_api.RecipeApi,
checkout: checkout_api.CheckoutContext,
) -> list[str]:
if api.path.exists(checkout.root / 'MODULE.bazel'):
# We're in a bzlmod-managed workspace.
flag = '--override_module' # pragma: no cover
else:
# We're in a traditional workspace.
flag = '--override_repository'
return [
f'{flag}={repo}={path}'
for repo, path in checkout.bazel_overrides.items()
]
@dataclasses.dataclass
class BazelRunner:
_api: recipe_api.RecipeApi
checkout: checkout_api.CheckoutContext
options: Options
_bazelisk_cache: Mapping[str, config_types.Path]
_bazel: config_types.Path | None = None
continue_after_build_error: bool = False
download_all_artifacts: bool = False
_base_args: Sequence[str] = ()
def ensure_bazelisk(self) -> config_types.Path:
return _ensure_bazelisk(
self._api,
self._bazelisk_cache,
self.options.bazelisk_version or DEFAULT_BAZELISK_VERSION,
)
def _override_arguments(self) -> list[str]:
return _override_arguments(api=self._api, checkout=self.checkout)
def info(self, explain_log: config_types.Path) -> dict[str, str]:
"""Extract metadata from the most recent build, using explain.log.
Use the first line to get the list of arguments, expanded. Strip out any
'--config=...' lines (these have been expanded but remain in the log).
Then run 'bazel info' with these stripped args and process the output.
"""
self._api.path.mock_add_file(explain_log)
if not self._api.path.isfile(explain_log):
self._api.step.empty('explain.log not found') # pragma: no cover
return {} # pragma: no cover
line = self._api.file.read_text(
'read explain.log',
explain_log,
test_data='Build options: --foo --bar --config=ham --spam --eggs',
).splitlines()[0]
build_options = 'Build options: '
assert line.startswith(build_options)
args: list[str] = []
for arg in shlex.split(line.removeprefix(build_options)):
if not arg.startswith('--config='):
args.append(arg)
def test_data():
test_data: dict[str, str] = {}
test_data['output_base'] = self._api.path.home_dir / '.cache/abc123'
test_data['output_path'] = test_data['output_base'] / 'execroot'
test_data['bazel-out'] = test_data['output_path'] / 'bazel-out'
test_data['bazel-bin'] = test_data['bazel-out'] / 'bazel-bin'
# For coverage of something that doesn't look like a Path.
test_data['gc-count'] = 11
return ''.join(f'{k}: {v}\n' for k, v in test_data.items())
step = self._api.step(
'info',
[self.ensure_bazelisk(), 'info', *args],
stdout=self._api.raw_io.output_text(add_output_log=True),
stderr=self._api.raw_io.output_text(add_output_log=True),
step_test_data=lambda: self._api.raw_io.test_api.stream_output_text(
test_data()
),
raise_on_failure=False,
)
# Narrow exception for one specific failure mode. If we hit this failure
# mode, continue and don't fail the build.
# TODO: b/439064080 - Remove this exception and the
# raise_on_failure=False argument above.
if step.exc_result.retcode:
if not re.search(
r"The repository '([\w@_-]+)' could not be resolved: "
r"Repository '\1' is not defined",
step.stderr,
): # pragma: no cover
raising = self._api.step.empty('raising')
raising.presentation.step_summary_text = step.stderr
self._api.step.raise_on_failure(step)
result = {}
for line in step.stdout.splitlines():
name, value = line.strip().split(': ', 1)
try:
result[name] = self._api.path.abs_to_path(value)
except ValueError:
result[name] = value
return result
def _read_config(self, *, name: str, path: config_types.Path) -> dict:
config = {}
self._api.path.mock_add_file(path)
if self._api.path.isfile(path):
config = self._api.file.read_json(
f'read {name}',
path,
test_data={
'pw': {
'bazel_presubmit': {
'remote_cache': True,
'upload_local_results': True,
'programs': {
'default': [
['build', '//...'],
['test', '//...'],
],
},
},
},
},
)
config = config.get('pw', config).get('bazel_presubmit', config)
return config
def _populate_base_args(self, config: dict) -> None:
base_args: list[str] = []
# Don't limit the amount Bazel will write to stdout/stderr.
base_args.append('--experimental_ui_max_stdouterr_bytes=-1')
# Don't download the remote build outputs to the local machine, since we
# will not use them, unless specifically requested.
if self.download_all_artifacts:
base_args.append('--remote_download_outputs=all')
else:
base_args.append('--remote_download_outputs=minimal')
base_args.extend(
_rbe_arguments(
self._api,
remote=config.get('remote'),
remote_cache=config.get('remote_cache'),
upload_local_results=config.get('upload_local_results'),
),
)
base_args.extend(self._override_arguments())
if self.continue_after_build_error:
base_args.append('--keep_going')
self._base_args = tuple(base_args)
def _run_bazel(
self,
*,
defer: defer_api.DeferContext,
cmd: Sequence[str | config_types.Path | util.Placeholder],
pres: engine_types.StepPresentation,
) -> dict[str, str]:
with self._api.default_timeout():
temp = self._api.path.mkdtemp()
explain_log = temp / 'explain.log'
args = list(cmd)
if 'build' in cmd or 'test' in cmd:
args.append(f'--explain={explain_log}')
bazel_result = defer(self._api.step, 'bazel', args)
info_result = defer(self.info, explain_log)
if info_result.is_ok():
info = info_result.result()
dirs = []
if 'output_base' in info:
dirs.append(info['output_base'])
if 'output_path' in info:
dirs.append(info['output_path'])
defer(
self._api.save_logs,
dirs=dirs,
pres=pres,
step_passed=bazel_result.is_ok(),
step_name='bazel',
)
return info_result.result()
return {} # pragma: no cover
def _extract_resultstore_link(
self,
json_path: config_types.Path | util.Placeholder,
future: futures_api.Future,
bazel_pres: engine_types.StepPresentation,
) -> None:
# Ensure the bazel step shows up before the resultstore link step.
self._api.time.sleep(1)
def read_json(i, path) -> bool:
if not self._api.path.isfile(path):
return False
data = self._api.file.read_text(
f'read {i}',
path,
test_data='\n'.join(
(
'{"id":{"buildMetadata":{}},"buildMetadata":{}}',
(
'{"id":{"started":{}},"started": '
'{"uuid":"abc-def-123-456-789"}}'
),
),
),
)
results_uuid = None
for line in data.splitlines():
event = json.loads(line)
if 'started' not in event:
continue
if 'uuid' not in event['started']:
raise self._api.step.InfraFailure( # pragma: no cover
'UUID missing from Bazel Build '
'Event Protocol `started` message'
)
results_uuid = event['started']['uuid']
break
if not results_uuid:
return False # pragma: no cover
results_url = (
'https://source.cloud.google.com/results/invocations/'
f'{results_uuid}'
)
pres.links['resultstore'] = results_url
bazel_pres.links['resultstore'] = results_url
pres.step_summary_text = ''
return True
found_resultstore_link = False
with self._api.step.nest('resultstore link') as pres:
pres.step_summary_text = 'link not found'
count = 5
for i in range(1, count):
self._api.time.sleep(i)
if i > 1:
self._api.path.mock_add_file(json_path)
if read_json(i, json_path):
found_resultstore_link = True
break
if future.done:
break # pragma: no cover
_ = future.result()
if not found_resultstore_link:
read_json(count, json_path) # pragma: no cover
def _upload_artifacts(
self,
gcs: gcs_upload_api.GcsUploadContext,
step: str,
info: dict[str, str],
) -> None:
if 'bazel-bin' not in info: # pragma: no cover
self._api.step.empty('no bazel-bin metadata')
return
bazel_bin = info['bazel-bin']
to_upload: list[config_types.Path] = []
with self._api.step.nest('glob'):
for pat in self.options.patterns_of_files_to_upload:
test_data: list[config_types.Path] = []
# If the pattern looks like a file and not a pattern, pretend
# it exists in testing.
if not set('*?[]') & set(pat):
test_data.append(pat)
self._api.path.mock_add_file(bazel_bin / pat)
to_upload.extend(
self._api.file.glob_paths(
name=pat,
source=bazel_bin,
pattern=pat,
include_hidden=True,
test_data=test_data,
),
)
with self._api.gcs_upload(parent=gcs, subdirectory=step) as step_gcs:
for entry in to_upload:
step_gcs.upload(
entry,
self._api.path.relpath(entry, bazel_bin),
)
def _run_step(
self,
*,
args: Sequence[str | config_types.Path | util.Placeholder],
defer: defer_api.DeferContext,
gcs: gcs_upload_api.GcsUploadContext | None = None,
):
json_path = self._api.path.mkdtemp() / 'bep.json'
cmd = [
self.ensure_bazelisk(),
*args,
'--build_event_json_file',
self._api.json.output(leak_to=json_path),
*self._base_args,
*self.options.extra_args,
]
name = shlex.join(['bazelisk'] + args)
with self._api.step.nest(name) as bazel_pres:
future = self._api.futures.spawn(
self._run_bazel,
cmd=cmd,
pres=bazel_pres,
defer=defer,
)
self._extract_resultstore_link(
json_path=json_path,
future=future,
bazel_pres=bazel_pres,
)
info = future.result()
if gcs:
# Replace things like "//..." with "_____" so they don't confuse
# people reading them as paths.
name = re.sub(
r'\.\.+',
lambda m: '_' * len(m.group(0)),
name,
)
name = name.replace('/', '_')
self._upload_artifacts(gcs, step=name, info=info)
def run(self) -> result_pb.RawResult | None:
config_name = self.options.config_path or 'pigweed.json'
config = self._read_config(
name=config_name,
path=self.checkout.root / config_name,
)
self._populate_base_args(config)
with contextlib.ExitStack() as stack:
stack.enter_context(self._api.context(cwd=self.checkout.root))
defer = stack.enter_context(self._api.defer.context())
gcs: gcs_upload_api.GcsUploadContext | None = None
if self.options.gcs_bucket:
gcs = stack.enter_context(
self._api.gcs_upload(gcs_bucket=self.options.gcs_bucket),
)
programs = config.get('programs', {})
for program in self.options.program or ('default',):
with contextlib.ExitStack() as stack:
stack.enter_context(self._api.step.nest(program))
program_gcs: gcs_upload_api.GcsUploadContext | None = None
if gcs:
program_gcs = stack.enter_context(
self._api.gcs_upload(
parent=gcs,
subdirectory=program,
),
)
if program not in programs:
raise self._api.step.InfraFailure( # pragma: no cover
f'{program} not in {programs.keys()}'
)
if not programs[program]:
raise self._api.step.InfraFailure( # pragma: no cover
f'{program} is empty'
)
for args in programs[program]:
self._run_step(args=args, defer=defer, gcs=program_gcs)
if gcs:
return gcs.raw_result()
class BazelApi(recipe_api.RecipeApi):
"""Bazel utilities."""
BazelRunner = BazelRunner
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._bazelisk_cache: Mapping[str, config_types.Path] = {}
def new_runner(
self,
checkout: checkout_api.CheckoutContext,
options: Options,
*,
continue_after_build_error: bool = False,
download_all_artifacts: bool = False,
) -> BazelRunner:
"""Creates a new BazelRunner.
Args:
checkout: The checkout to run bazel in.
options: The bazel options proto.
continue_after_build_error: If true, don't stop the build if a
bazel command fails.
download_all_artifacts: If true, download all remote build outputs.
"""
return BazelRunner(
_api=self.m,
checkout=checkout,
options=options,
continue_after_build_error=continue_after_build_error,
download_all_artifacts=download_all_artifacts,
_bazelisk_cache=self._bazelisk_cache,
)
def ensure_bazelisk(
self,
version: str = DEFAULT_BAZELISK_VERSION,
) -> config_types.Path:
return _ensure_bazelisk(
self.m,
self._bazelisk_cache,
version,
)
def rbe_arguments(
self,
remote: bool = True,
remote_cache: bool = True,
upload_local_results: bool = True,
) -> list[str]:
# The next line is covered by workflows recipe tests.
return _rbe_arguments( # pragma: no cover
self.m,
remote=remote,
remote_cache=remote_cache,
upload_local_results=upload_local_results,
)
def override_arguments(
self,
checkout: checkout_api.CheckoutContext,
) -> list[str]:
return _override_arguments(api=self.m, checkout=checkout)