blob: 4d9a584e572ebb77ff310430f8da52b5ed2e0af0 [file]
# Copyright 2025 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Bisect build failures without storing state.
Look at builders that track a specific repository. Look at builds of those
builders by the commits that triggered them (so "most recent" refers not to when
the build ran but when the commit used to trigger the build was submitted). If
the most recent builds failed but less recent builds passed, trigger a build
with a commit in between the oldest failure and the most recent pass.
If there's already a build running (or scheduled) with that commit, do nothing.
Once that running build completes, the next bisector run will have different
parameters and will take the next step in the bisection.
Use buildbucket instead of luci-scheduler to trigger builds so bisection builds
won't get combined with gitiles poller-triggered builds. (Luci-scheduler will
take many triggers that happened in a short window and combine them to trigger
just one build, so bisector builders might be combined with a build created
because of a newly submitted change.)
This is related to the rerunner recipe, but has a different purpose. Rerunner
builders make builders green when they fail because of a flake, or confirm that
the failure is not a flake. Bisector builders attribute a new non-flake failure
to a specific commit.
"""
from __future__ import annotations
import collections
import dataclasses
import fnmatch
import json
from typing import TYPE_CHECKING
from PB.recipe_engine import result as result_pb
from PB.go.chromium.org.luci.buildbucket.proto import (
build as build_pb,
builds_service as builds_service_pb,
common as common_pb,
project_config as bb_pb,
)
from PB.recipes.pigweed.bisector import InputProperties
from PB.recipe_modules.pigweed.checkout.options import (
Options as CheckoutOptions,
)
from PB.recipe_engine import result
from recipe_engine import post_process
if TYPE_CHECKING: # pragma: no cover
from typing import Generator, Sequence
from recipe_engine import recipe_api, recipe_test_api
DEPS = [
'fuchsia/buildbucket_util',
'fuchsia/builder_status',
'fuchsia/gerrit',
'fuchsia/git',
'pigweed/checkout',
'recipe_engine/buildbucket',
'recipe_engine/context',
'recipe_engine/luci_config',
'recipe_engine/path',
'recipe_engine/properties',
'recipe_engine/step',
'recipe_engine/time',
]
PROPERTIES = InputProperties
def include_bucket(props: InputProperties, bucket: str) -> bool:
if not props.excluded_buckets and not props.included_buckets:
props.included_buckets.append('*.ci')
props.included_buckets.append('ci')
for excluded_bucket in props.excluded_buckets:
if fnmatch.fnmatch(bucket, excluded_bucket):
return False
for included_bucket in props.included_buckets:
if fnmatch.fnmatch(bucket, included_bucket):
return True
return False
@dataclasses.dataclass(frozen=True)
class Remote:
url: str
branch: str
@dataclasses.dataclass
class Trigger:
bucket: str
builder: str
remote: str
ref: str
commit: str
def commit40(n: int) -> str:
assert isinstance(n, int)
return ('c' + f'{n:06d}' * 7)[0:40]
def RunSteps(api: recipe_api.RecipeApi, props: InputProperties):
props.dry_run = props.dry_run or api.buildbucket_util.is_tryjob
bb_cfg: bb_pb.BuildbucketCfg = api.luci_config.buildbucket()
builds_to_launch: list[tuple[str, str]] = []
repos: dict[Remote, tuple[str, ...]] = {}
for bucket in bb_cfg.buckets:
if not include_bucket(props, bucket.name):
if bucket.swarming.builders:
api.step(
f'excluding {len(bucket.swarming.builders)} builders in '
f'bucket {bucket.name}',
None,
)
continue
with api.step.nest(bucket.name) as pres:
included = excluded = 0
for builder in bucket.swarming.builders:
num_builds: int = props.num_builds or 15
with api.step.nest(builder.name):
if not builder.properties:
api.step.empty('no properties')
excluded += 1
continue
build_props = json.loads(builder.properties)
if build_props.get('do_not_rerun'):
api.step.empty('do not rerun')
excluded += 1
continue
build_props.setdefault('checkout_options', {})
checkout_options = build_props['checkout_options']
if checkout_options.get('use_repo', False):
api.step.empty('repo-based, not bisecting')
excluded += 1
continue
remote: Remote | None = None
if 'remote' in checkout_options:
remote = Remote(
url=checkout_options['remote'],
branch=checkout_options.get('branch', 'main'),
)
if not remote or not remote.url:
no_pres = api.step.empty('no remote').presentation
no_pres.step_summary_text = str(builder.properties)
excluded += 1
continue
if remote not in repos:
root = api.path.mkdtemp()
checkout = api.checkout(
CheckoutOptions(
remote=remote.url,
branch=remote.branch,
initialize_submodules=False,
),
root=root / 'checkout',
)
with api.context(cwd=checkout.root):
repos[remote] = tuple(
api.git.log(
depth=1000,
fmt='format:%H',
test_data='\n'.join(
commit40(x) for x in range(1000)
),
).stdout.splitlines()
)
included += 1
# Don't DoS buildbucket. (And there's no need for this
# builder to run quickly.)
api.time.sleep(0.1)
status: BuilderStatus = api.builder_status.retrieve(
bucket=bucket.name, builder=builder.name, n=num_builds
)
# If the builder hasn't recently passed, we don't have a
# known good commit and can't bisect.
if not api.builder_status.has_recently_passed(status):
api.step.empty('no recent passes')
continue
# If five or more of the recent builds are infra failures,
# don't trigger more builds.
infra_failures = [
x
for x in status.builds
if x.status == common_pb.INFRA_FAILURE
]
if len(infra_failures) >= 5:
api.step.empty('too many infra failures')
continue
statuses: dict[str, list[common_pb.Status]] = {}
for build in status.builds:
if (
build.status & common_pb.ENDED_MASK
and build.input.gitiles_commit
):
commit = build.input.gitiles_commit.id
statuses.setdefault(commit, [])
statuses[commit].append(build.status)
passes = {
commit
for commit, status in statuses.items()
if common_pb.SUCCESS in status
}
failures = {
commit
for commit, status in statuses.items()
if common_pb.FAILURE in status
}
oldest_failure = None
newest_passing = None
for i, commit in enumerate(repos[remote]):
if commit in passes:
newest_passing = i
break
if commit in failures:
oldest_failure = i
if oldest_failure is None or newest_passing is None:
need = api.step.empty('no need to bisect').presentation
need.step_summary_text = repr(
dict(
oldest_failure=oldest_failure,
newest_passing=newest_passing,
)
)
continue
i = oldest_failure + (newest_passing - oldest_failure) // 2
if i in (newest_passing, oldest_failure):
api.step.empty('already attributed')
continue
with api.step.nest('picked') as picked_pres:
commit = repos[remote][i]
api.step.empty(str(i))
picked_pres.logs[commit[0:7]] = (
f'{remote.url}/+/{commit}'
)
api.step.empty(commit)
already_scheduled = False
for build in status.builds:
if (
build.status
in (
common_pb.SCHEDULED,
common_pb.STARTED,
)
and build.input.gitiles_commit
and build.input.gitiles_commit.id == commit
):
api.step.empty('already scheduled')
already_scheduled = True
break
if already_scheduled:
continue
api.step.empty('scheduling')
builds_to_launch.append(
Trigger(
bucket=bucket.name,
builder=builder.name,
remote=remote.url,
ref=remote.branch,
commit=commit,
)
)
pres.step_summary_text = f'included {included}, excluded {excluded}'
# These don't help users much but are useful for testing.
api.step.empty(f'included {included}')
api.step.empty(f'excluded {excluded}')
if not builds_to_launch:
api.step('nothing to launch', None)
return result_pb.RawResult(
summary_markdown='nothing to launch',
status=common_pb.SUCCESS,
)
bb_requests: list[builds_service_pb.ScheduleBuildRequest] = []
for trigger in builds_to_launch:
host = api.gerrit.host_from_remote_url(trigger.remote)
host = host.replace('-review.', '.')
bb_requests.append(
api.buildbucket.schedule_request(
bucket=trigger.bucket,
builder=trigger.builder,
gitiles_commit=common_pb.GitilesCommit(
host=host,
project=api.gerrit.project_from_remote_url(trigger.remote),
id=trigger.commit,
ref=f'refs/heads/{trigger.ref}',
),
),
)
if props.dry_run:
with api.step.nest('dry-run, not launching builds'):
links: list[tuple[str, str]] = []
for req in bb_requests:
bucket_builder: str = (
f'{req.builder.bucket}/{req.builder.builder}'
)
pres = api.step.empty(bucket_builder).presentation
links.append(
(
bucket_builder,
f'https://ci.chromium.org/ui/p/{req.builder.project}/'
f'builders/{bucket_builder}',
)
)
links_combined: str = ''.join(
f'<br/>[{name}]({link})' for name, link in links
)
return result_pb.RawResult(
summary_markdown=f'dry-run, would have launched: {links_combined}',
status=common_pb.SUCCESS,
)
with api.step.nest('launch') as pres:
links: list[tuple[str, str]] = []
if bb_requests:
builds: list[build_pb.Build] = api.buildbucket.schedule(bb_requests)
for build in builds:
bucket_builder: str = (
f'{build.builder.bucket}/{build.builder.builder}'
)
link: str = api.buildbucket.build_url(build_id=build.id)
pres.links[bucket_builder] = link
links.append((bucket_builder, link))
links_combined: str = ''.join(
f'<br/>[{name}]({link})' for name, link in links
)
return result_pb.RawResult(
summary_markdown=f'launched: {links_combined}',
status=common_pb.SUCCESS,
)
def GenTests(api) -> Generator[recipe_test_api.TestData, None, None]:
def properties(
*,
included_buckets: Sequence[str] = (),
excluded_buckets: Sequence[str] = (),
dry_run: bool = False,
):
props = InputProperties(dry_run=dry_run)
props.included_buckets.extend(included_buckets)
props.excluded_buckets.extend(excluded_buckets)
return api.properties(props)
def test(name, *args, **kwargs):
return api.test(
name,
api.buildbucket.ci_build(project='pigweed'),
*args,
**kwargs,
)
def buildbucket_config(buckets: bb_pb.Bucket):
cfg = bb_pb.BuildbucketCfg()
cfg.buckets.extend(buckets)
return cfg
def bucket_config(
name: str,
builders: Sequence[bb_pb.BuilderConfig],
):
cfg = bb_pb.Bucket(name=name)
cfg.swarming.builders.extend(builders)
return cfg
def builder_config(
name: str,
remote='https://pigweed.googlesource.com/pigweed/pigweed',
):
props = {'do_not_rerun': 'norerun' in name}
if remote:
props['checkout_options'] = {
'remote': None if 'noremote' in name else remote,
'branch': 'main',
'use_repo': 'repo' in name,
}
kwargs = {'name': name}
if 'noprops' not in name:
kwargs['properties'] = json.dumps(props)
return bb_pb.BuilderConfig(**kwargs)
def mock_buildbucket_config(
*buckets_builders: Sequence[tuple[str, Sequence[str]]],
):
buckets: List[bb_pb.Bucket] = []
for bucket_name, builder_names in buckets_builders:
builders: List[bb_pb.BuilderConfig] = []
for builder in builder_names:
builders.append(builder_config(builder))
buckets.append(bucket_config(bucket_name, builders))
return api.luci_config.mock_config(
project='pigweed',
config_name='cr-buildbucket.cfg',
data=buildbucket_config(buckets),
)
def excluding_bucket(bucket, num):
return api.post_process(
post_process.MustRun,
f'excluding {num} builders in bucket {bucket}',
)
def including_bucket(bucket):
return api.post_process(post_process.MustRun, bucket)
def excluding_builder(bucket, num):
return api.post_process(
post_process.MustRun,
f'{bucket}.excluded {num}',
)
def including_builder(bucket, num):
return api.post_process(
post_process.MustRun,
f'{bucket}.included {num}',
)
def build_status(
*statuses: str,
spacing: int,
prefix: str = '',
):
step_name = None
if prefix:
step_name = f'{prefix}.buildbucket.search'
builds = []
for i, status in enumerate(statuses):
commit = commit40(spacing * i)
if '-' in status:
commit = commit40(int(status.split('-')[1]))
status = status.split('-')[0]
build = getattr(api.builder_status, status)()
build.input.gitiles_commit.id = commit
builds.append(build)
return api.buildbucket.simulated_search_results(
builds,
step_name=step_name,
)
def assert_do_not_rerun(prefix):
return api.post_process(post_process.MustRun, f'{prefix}.do not rerun')
def assert_no_properties(prefix):
return api.post_process(post_process.MustRun, f'{prefix}.no properties')
def assert_no_remote(prefix):
return api.post_process(post_process.MustRun, f'{prefix}.no remote')
def assert_uses_repo(prefix):
return api.post_process(
post_process.MustRun,
f'{prefix}.repo-based, not bisecting',
)
def assert_skip_no_recent_passes(prefix):
return api.post_process(
post_process.MustRun,
f'{prefix}.no recent passes',
)
def assert_too_many_infra_failures(prefix):
return api.post_process(
post_process.MustRun,
f'{prefix}.too many infra failures',
)
def assert_no_need_to_bisect(prefix):
return api.post_process(
post_process.MustRun,
f'{prefix}.no need to bisect',
)
def assert_already_attributed(prefix):
return api.post_process(
post_process.MustRun,
f'{prefix}.already attributed',
)
def assert_picked(prefix, i):
return api.post_process(post_process.MustRun, f'{prefix}.picked.{i}')
def assert_already_scheduled(prefix):
return api.post_process(
post_process.MustRun,
f'{prefix}.already scheduled',
)
def assert_scheduling(prefix):
return api.post_process(post_process.MustRun, f'{prefix}.scheduling')
def assert_buildbucket_scheduled():
return api.post_process(
post_process.MustRun,
f'launch.buildbucket.schedule',
)
def assert_nothing_to_launch():
return api.post_process(
post_process.MustRun,
f'nothing to launch',
)
def assert_dry_run():
return api.post_process(
post_process.MustRun,
f'dry-run, not launching builds',
)
def drop_expectations_must_be_last():
# No need for expectation files, everything of note here is tested by
# assertions. This must be the last thing added to the test.
return api.post_process(post_process.DropExpectation)
yield test(
'default-ci-only',
properties(),
mock_buildbucket_config(('try', ('foo', 'bar', 'baz'))),
excluding_bucket('try', 3),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'ci-only',
properties(included_buckets=('*.ci', 'ci')),
mock_buildbucket_config(('try', ('foo', 'bar', 'baz'))),
excluding_bucket('try', 3),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'ignore-abc',
properties(
included_buckets=('*.ci', 'ci'),
excluded_buckets=('abc.*'),
),
mock_buildbucket_config(('abc.ci', ('foo', 'bar', 'baz'))),
excluding_bucket('abc.ci', 3),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'donotrerun',
properties(),
mock_buildbucket_config(('abc.ci', ('foo-norerun', 'bar', 'baz'))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
excluding_builder('abc.ci', 1),
assert_do_not_rerun('abc.ci.foo-norerun'),
drop_expectations_must_be_last(),
)
yield test(
'repo_noremote',
mock_buildbucket_config(('abc.ci', ('noremote', 'repo', 'noprops'))),
including_bucket('abc.ci'),
assert_no_remote('abc.ci.noremote'),
assert_no_properties('abc.ci.noprops'),
assert_uses_repo('abc.ci.repo'),
excluding_builder('abc.ci', 3),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'no_recent_passes',
properties(),
mock_buildbucket_config(('abc.ci', ('foo',))),
including_bucket('abc.ci'),
including_builder('abc.ci', 1),
build_status(
'failure',
'failure',
'failure',
prefix='abc.ci.foo',
spacing=5,
),
assert_skip_no_recent_passes('abc.ci.foo'),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'recent_passes_gaps',
mock_buildbucket_config(('abc.ci', ('foo', 'bar'))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
build_status(
'failure',
'failure',
'passed',
prefix='abc.ci.foo',
spacing=5,
),
build_status(
'failure',
'failure',
'passed',
prefix='abc.ci.bar',
spacing=10,
),
assert_picked('abc.ci.foo', 7),
assert_scheduling('abc.ci.foo'),
assert_picked('abc.ci.bar', 15),
assert_scheduling('abc.ci.bar'),
assert_buildbucket_scheduled(),
drop_expectations_must_be_last(),
)
yield test(
'recent_passes_already_scheduled',
mock_buildbucket_config(('abc.ci', ('foo', 'bar'))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
build_status(
'scheduled-7',
'failure',
'passed',
prefix='abc.ci.foo',
spacing=5,
),
build_status(
'running-15',
'failure',
'passed',
prefix='abc.ci.bar',
spacing=10,
),
assert_picked('abc.ci.foo', 7),
assert_already_scheduled('abc.ci.foo'),
assert_picked('abc.ci.bar', 15),
assert_already_scheduled('abc.ci.bar'),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'recent_passes_nogaps',
mock_buildbucket_config(('abc.ci', ('foo', 'bar'))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
build_status(
'failure',
'failure',
'passed',
prefix='abc.ci.foo',
spacing=1,
),
build_status(
'failure',
'failure',
'passed',
prefix='abc.ci.bar',
spacing=1,
),
assert_already_attributed('abc.ci.foo'),
assert_already_attributed('abc.ci.bar'),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'dry_run',
properties(dry_run=True),
mock_buildbucket_config(('abc.ci', ('foo', 'bar'))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
build_status(
'failure',
'failure',
'passed',
prefix='abc.ci.foo',
spacing=40,
),
build_status(
'failure',
'failure',
'passed',
prefix='abc.ci.bar',
spacing=20,
),
assert_picked('abc.ci.foo', 60),
assert_scheduling('abc.ci.foo'),
assert_picked('abc.ci.bar', 30),
assert_scheduling('abc.ci.bar'),
assert_dry_run(),
drop_expectations_must_be_last(),
)
yield test(
'infra_failures',
mock_buildbucket_config(('abc.ci', ('foo', 'bar'))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
build_status(
'infra_failure',
'infra_failure',
'infra_failure',
'infra_failure',
'infra_failure',
'infra_failure',
'passed',
prefix='abc.ci.foo',
spacing=5,
),
build_status(
'infra_failure',
'infra_failure',
'passed',
prefix='abc.ci.bar',
spacing=5,
),
assert_too_many_infra_failures('abc.ci.foo'),
assert_no_need_to_bisect('abc.ci.bar'),
drop_expectations_must_be_last(),
)