blob: 4276c1904508c68562c38145daf96a6a8b9c9e53 [file] [log] [blame]
# Copyright 2023 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Retrigger builds where the latest build failed.
If the most recent build of a builder failed, retrigger it. Exceptions:
* A build of the builder is currently scheduled or started (e.g., about to run,
or already running)
* No recent builds of the builder passed (e.g., this is a true failure and not
a flake)
This will allow other tooling to check the latest build in ci and evaluate
whether it's passing. There are three cases: it's passing, it's failing because
of a flake, and it's failing because it's broken. This should reduce the impact
of the second case because the builder will be retried several times, until
there's a passing build, or there are 10 failing builds in a row.
Tools should check to see if any of the most recent 10 builds passed, and if so
assume the builder is passing. If the builder is broken, the passing build will
be bumped from the 10 most recent builds before long.
"""
from __future__ import annotations
import collections
import fnmatch
import json
from typing import TYPE_CHECKING
from PB.recipe_engine import result as result_pb2
from PB.go.chromium.org.luci.buildbucket.proto import (
build as build_pb2,
builds_service as builds_service_pb2,
common as common_pb2,
project_config as bb_pb2,
)
from PB.go.chromium.org.luci.scheduler.appengine.messages import (
config as scheduler_pb2,
)
from PB.recipes.pigweed.rerunner import InputProperties
from PB.recipe_engine import result
from recipe_engine import post_process
if TYPE_CHECKING: # pragma: no cover
from typing import Generator, Sequence, Tuple
from recipe_engine import recipe_api, recipe_test_api
DEPS = [
'fuchsia/builder_status',
'fuchsia/luci_config',
'recipe_engine/buildbucket',
'recipe_engine/properties',
'recipe_engine/scheduler',
'recipe_engine/step',
'recipe_engine/time',
]
PROPERTIES = InputProperties
def include_bucket(props: InputProperties, bucket: str) -> bool:
if not props.excluded_buckets and not props.included_buckets:
props.included_buckets.append('*.ci')
props.included_buckets.append('ci')
for excluded_bucket in props.excluded_buckets:
if fnmatch.fnmatch(bucket, excluded_bucket):
return False
for included_bucket in props.included_buckets:
if fnmatch.fnmatch(bucket, included_bucket):
return True
return False
def RunSteps(api: recipe_api.RecipeApi, props: InputProperties):
bb_cfg: bb_pb2.BuildbucketCfg = api.luci_config.buildbucket()
builds_to_launch: list[tuple[str, str]] = []
for bucket in bb_cfg.buckets:
if not include_bucket(props, bucket.name):
api.step(
f'excluding {len(bucket.swarming.builders)} builders in '
f'bucket {bucket.name}',
None,
)
continue
with api.step.nest(bucket.name) as pres:
included = excluded = 0
for builder in bucket.swarming.builders:
num_builds: int = props.num_builds or 10
if builder.properties:
if json.loads(builder.properties).get('do_not_rerun'):
excluded += 1
continue
included += 1
with api.step.nest(builder.name):
# Don't DoS buildbucket. (And there's no need for this
# builder to run quickly.)
api.time.sleep(0.1)
status: BuilderStatus = api.builder_status.retrieve(
bucket=bucket.name, builder=builder.name, n=num_builds
)
# If the builder is currently running, don't start a new
# build.
if api.builder_status.is_incomplete(status):
api.step('is incomplete', None)
continue
# If the builder is currently passing, don't start a new
# build.
if api.builder_status.is_passing(status):
api.step('is passing', None)
continue
# If the builder hasn't recently passed, this probably isn't
# a flake and we should not start a new build.
if not api.builder_status.has_recently_passed(status):
api.step('no recent passes', None)
continue
api.step('scheduling', None)
builds_to_launch.append((bucket.name, builder.name))
pres.step_summary_text = f'included {included}, excluded {excluded}'
# These don't help users much but are useful for testing.
api.step.empty(f'included {included}')
api.step.empty(f'excluded {excluded}')
if not builds_to_launch:
api.step('nothing to launch', None)
return result_pb2.RawResult(
summary_markdown='nothing to launch',
status=common_pb2.SUCCESS,
)
sched_cfg: scheduler_pb2.ProjectConfig = api.luci_config.scheduler()
jobs: dict[str, list[str]] = collections.defaultdict(list)
for job in sched_cfg.job:
bucket_builder = (job.buildbucket.bucket, job.buildbucket.builder)
jobs[bucket_builder].append(job.id)
bb_requests: list[builds_service_pb2.ScheduleBuildRequest] = []
sched_ids: list[str] = []
for bucket_builder in builds_to_launch:
if bucket_builder in jobs and len(jobs[bucket_builder]) == 1:
sched_ids.append(jobs[bucket_builder][0])
else:
bb_requests.append(
api.buildbucket.schedule_request(
bucket=bucket_builder[0],
builder=bucket_builder[1],
)
)
def scheduler_link(job_id):
project: str = api.buildbucket.build.builder.project
return f'https://luci-scheduler.appspot.com/jobs/{project}/{job_id}'
if props.dry_run:
with api.step.nest('dry-run, not launching builds'):
links: list[tuple[str, str]] = [
(x, scheduler_link(x)) for x in sched_ids
]
for req in bb_requests:
bucket_builder: str = (
f'{req.builder.bucket}/{req.builder.builder}'
)
api.step(bucket_builder, None)
links.append(
(
bucket_builder,
f'https://ci.chromium.org/ui/p/{req.builder.project}/'
f'builders/{bucket_builder}',
)
)
links_combined: str = ''.join(
f'<br/>[{name}]({link})' for name, link in links
)
return result_pb2.RawResult(
summary_markdown=f'dry-run, would have launched: {links_combined}',
status=common_pb2.SUCCESS,
)
with api.step.nest('launch') as pres:
links: list[tuple[str, str]] = []
if sched_ids:
api.scheduler.emit_trigger(
api.scheduler.BuildbucketTrigger(),
api.buildbucket.build.builder.project,
sched_ids,
)
links.extend((x, scheduler_link(x)) for x in sched_ids)
if bb_requests:
builds: list[build_pb2.Build] = api.buildbucket.schedule(
bb_requests
)
for build in builds:
bucket_builder: str = (
f'{build.builder.bucket}/{build.builder.builder}'
)
link: str = api.buildbucket.build_url(build_id=build.id)
pres.links[bucket_builder] = link
links.append((bucket_builder, link))
links_combined: str = ''.join(
f'<br/>[{name}]({link})' for name, link in links
)
return result_pb2.RawResult(
summary_markdown=f'launched: {links_combined}',
status=common_pb2.SUCCESS,
)
def GenTests(api) -> Generator[recipe_test_api.TestData, None, None]:
def properties(
*,
included_buckets: Sequence[str] = (),
excluded_buckets: Sequence[str] = (),
dry_run: bool = False,
):
props = InputProperties(dry_run=dry_run)
props.included_buckets.extend(included_buckets)
props.excluded_buckets.extend(excluded_buckets)
return api.properties(props)
def test(name, *args, **kwargs):
return api.test(
name,
api.buildbucket.ci_build(project='pigweed'),
*args,
**kwargs,
)
def buildbucket_config(buckets: bb_pb2.Bucket):
cfg = bb_pb2.BuildbucketCfg()
cfg.buckets.extend(buckets)
return cfg
def bucket_config(
name: str,
builders: Sequence[bb_pb2.BuilderConfig],
):
cfg = bb_pb2.Bucket(name=name)
cfg.swarming.builders.extend(builders)
return cfg
def builder_config(name: str):
return bb_pb2.BuilderConfig(
name=name,
properties=json.dumps({'do_not_rerun': 'norerun' in name}),
)
def mock_buildbucket_config(
*buckets_builders: Sequence[Tuple[str, Sequence[str]]],
):
buckets: List[bb_pb2.Bucket] = []
for bucket_name, builder_names in buckets_builders:
builders: List[bb_pb2.BuilderConfig] = []
for builder in builder_names:
builders.append(builder_config(builder))
buckets.append(bucket_config(bucket_name, builders))
return api.luci_config.mock_config(
project='pigweed',
config_name='cr-buildbucket.cfg',
data=buildbucket_config(buckets),
)
def mock_scheduler_config(
*buckets_builders: Sequence[Tuple[str, Sequence[str]]],
):
cfg = scheduler_pb2.ProjectConfig()
for bucket_name, builder_names in buckets_builders:
for builder_name in builder_names:
cfg.job.append(
scheduler_pb2.Job(
id=f'{bucket_name}-{builder_name}',
buildbucket=scheduler_pb2.BuildbucketTask(
bucket=bucket_name,
builder=builder_name,
),
)
)
return api.luci_config.mock_config(
project='pigweed',
config_name='luci-scheduler.cfg',
data=cfg,
)
def excluding_bucket(bucket, num):
return api.post_process(
post_process.MustRun,
f'excluding {num} builders in bucket {bucket}',
)
def including_bucket(bucket):
return api.post_process(post_process.MustRun, bucket)
def excluding_builder(bucket, num):
return api.post_process(
post_process.MustRun,
f'{bucket}.excluded {num}',
)
def including_builder(bucket, num):
return api.post_process(
post_process.MustRun,
f'{bucket}.included {num}',
)
def build_status(*statuses: str, prefix: str = ''):
step_name = None
if prefix:
step_name = f'{prefix}.buildbucket.search'
return api.buildbucket.simulated_search_results(
[getattr(api.builder_status, x)() for x in statuses],
step_name=step_name,
)
def assert_skip_is_incomplete(prefix):
return api.post_process(post_process.MustRun, f'{prefix}.is incomplete')
def assert_skip_is_passing(prefix):
return api.post_process(post_process.MustRun, f'{prefix}.is passing')
def assert_skip_no_recent_passes(prefix):
return api.post_process(
post_process.MustRun,
f'{prefix}.no recent passes',
)
def assert_scheduling(prefix):
return api.post_process(post_process.MustRun, f'{prefix}.scheduling')
def assert_buildbucket_scheduled():
return api.post_process(
post_process.MustRun,
f'launch.buildbucket.schedule',
)
def assert_scheduler_triggered():
return api.post_process(
post_process.MustRun,
'launch.luci-scheduler.EmitTriggers',
)
def assert_nothing_to_launch():
return api.post_process(
post_process.MustRun,
f'nothing to launch',
)
def assert_dry_run():
return api.post_process(
post_process.MustRun,
f'dry-run, not launching builds',
)
def drop_expectations_must_be_last():
# No need for expectation files, everything of note here is tested by
# assertions. This must be the last thing added to the test.
return api.post_process(post_process.DropExpectation)
yield test(
'default-ci-only',
mock_buildbucket_config(('try', ('foo', 'bar', 'baz'))),
excluding_bucket('try', 3),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'ci-only',
properties(included_buckets=("*.ci", "ci")),
mock_buildbucket_config(('try', ('foo', 'bar', 'baz'))),
excluding_bucket('try', 3),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'ignore-abc',
properties(
included_buckets=("*.ci", "ci"),
excluded_buckets=("abc.*"),
),
mock_buildbucket_config(('abc.ci', ('foo', 'bar', 'baz'))),
excluding_bucket('abc.ci', 3),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'donotrerun',
mock_buildbucket_config(('abc.ci', ('foo-norerun', 'bar', 'baz'))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
excluding_builder('abc.ci', 1),
drop_expectations_must_be_last(),
)
yield test(
'scheduled',
mock_buildbucket_config(('abc.ci', ('foo',))),
including_bucket('abc.ci'),
including_builder('abc.ci', 1),
build_status('scheduled', prefix='abc.ci.foo'),
assert_skip_is_incomplete('abc.ci.foo'),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'running',
mock_buildbucket_config(('abc.ci', ('foo',))),
including_bucket('abc.ci'),
including_builder('abc.ci', 1),
build_status('running', prefix='abc.ci.foo'),
assert_skip_is_incomplete('abc.ci.foo'),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'passed',
mock_buildbucket_config(('abc.ci', ('foo',))),
including_bucket('abc.ci'),
including_builder('abc.ci', 1),
build_status('passed', prefix='abc.ci.foo'),
assert_skip_is_passing('abc.ci.foo'),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'no_recent_passes',
mock_buildbucket_config(('abc.ci', ('foo',))),
including_bucket('abc.ci'),
including_builder('abc.ci', 1),
build_status('failure', 'failure', 'failure', prefix='abc.ci.foo'),
assert_skip_no_recent_passes('abc.ci.foo'),
assert_nothing_to_launch(),
drop_expectations_must_be_last(),
)
yield test(
'recent_passes',
mock_buildbucket_config(('abc.ci', ('foo', 'bar'))),
mock_scheduler_config(('abc.ci', ('foo',))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
build_status('failure', 'failure', 'passed', prefix='abc.ci.foo'),
build_status('failure', 'failure', 'passed', prefix='abc.ci.bar'),
assert_scheduling('abc.ci.foo'),
assert_scheduling('abc.ci.bar'),
assert_buildbucket_scheduled(),
assert_scheduler_triggered(),
drop_expectations_must_be_last(),
)
yield test(
'dry_run',
properties(dry_run=True),
mock_buildbucket_config(('abc.ci', ('foo', 'bar'))),
mock_scheduler_config(('abc.ci', ('foo',))),
including_bucket('abc.ci'),
including_builder('abc.ci', 2),
build_status('failure', 'failure', 'passed', prefix='abc.ci.foo'),
build_status('failure', 'failure', 'passed', prefix='abc.ci.bar'),
assert_scheduling('abc.ci.foo'),
assert_scheduling('abc.ci.bar'),
assert_dry_run(),
drop_expectations_must_be_last(),
)