blob: d636bb6496c63a8a33dcfa1cc1d0fa9c56afab33 [file] [log] [blame]
# Copyright 2024 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""File bugs when builds start to fail.
If a builder was reliably passing and is now consistently failing, file a bug
for the oncall user to triage. Once the builder is reliably passing again,
close the bug.
In more detail, if a builder has passed in the most recent 20 runs, but has been
failing for the most recent 5 runs, file a bug about the builder. Once it's been
passing again for at least 3 runs, automatically close the bug.
Don't automatically close the bug immediately to give people a chance to update
the bug with what they did to fix it and manually close it. But, if there was a
big problem affecting many builders, don't make people manually go through and
close/dupe many bugs.
After 30 days of a builder failing, forget about it. This means if a builder is
fixed after 31 days the associated bug will not be automatically closed. This
keeps the history saved in build output properties from growing unbounded.
There are many diagnostic steps in the output. These are merely there for recipe
unit testing, but they could be helpful to people browsing the UI as well.
"""
from __future__ import annotations
import collections
import datetime
import fnmatch
import itertools
import json
from typing import TYPE_CHECKING
from PB.recipe_engine import result as result_pb
from PB.go.chromium.org.luci.buildbucket.proto import (
build as build_pb,
builds_service as builds_service_pb,
common as common_pb,
project_config as bb_pb,
)
from PB.recipes.pigweed.bug_filer import InputProperties
from PB.recipe_engine import result
from recipe_engine import post_process
if TYPE_CHECKING: # pragma: no cover
from typing import Generator, Sequence
from recipe_engine import recipe_api, recipe_test_api
DEPS = [
'fuchsia/builder_state',
'fuchsia/builder_status',
'fuchsia/issuetracker',
'recipe_engine/buildbucket',
'recipe_engine/luci_config',
'recipe_engine/properties',
'recipe_engine/step',
'recipe_engine/time',
]
PROPERTIES = InputProperties
def include_bucket(props: InputProperties, bucket: str) -> bool:
if not props.excluded_buckets and not props.included_buckets:
props.included_buckets.append('*.ci')
props.included_buckets.append('ci')
props.included_buckets.append('*.roll')
props.included_buckets.append('roll')
props.excluded_buckets.append('*.dev.*')
props.excluded_buckets.append('dev.*')
for excluded_bucket in props.excluded_buckets:
if fnmatch.fnmatch(bucket, excluded_bucket):
return False
for included_bucket in props.included_buckets:
if fnmatch.fnmatch(bucket, included_bucket):
return True
return False
def RunSteps(
api: recipe_api.RecipeApi,
props: InputProperties,
) -> result.RawResult | None:
props.was_recently_passing_builds = props.was_recently_passing_builds or 20
props.is_currently_passing_builds = props.is_currently_passing_builds or 5
props.is_currently_failing_builds = props.is_currently_failing_builds or 3
props.purge_timeout_days = props.purge_timeout_days or 30
props.max_age_days = props.max_age_days or 10
# Some of the logic doesn't make sense unless these conditions hold.
assert props.was_recently_passing_builds > props.is_currently_passing_builds
assert props.was_recently_passing_builds > props.is_currently_failing_builds
# Not all events will happen at the same time, but since we're looking at
# differences of days or weeks we'll simplify the testing logic by using
# the same time through the builder.
now = api.time.time()
builder_state = api.builder_state.fetch_previous_state()
api.issuetracker.BUG_LINK_PREFIX = "https://pwbug.dev/"
bb_cfg: bb_pb.BuildbucketCfg = api.luci_config.buildbucket()
bugs_filed: list[str] = []
for bucket in bb_cfg.buckets:
with api.step.nest(bucket.name) as pres:
if not include_bucket(props, bucket.name):
api.step.empty('excluding')
continue
for builder in bucket.swarming.builders:
with api.step.nest(builder.name):
if builder.experimental == bb_pb.YES:
api.step.empty('experimental, skipping')
# Don't DoS buildbucket. (And there's no need for this
# builder to run quickly.)
api.time.sleep(0.1)
key = f'{bucket.name}/{builder.name}'
status = api.builder_status.retrieve(
bucket=bucket.name,
builder=builder.name,
n=props.was_recently_passing_builds,
include_incomplete=False,
assume_existence=True,
max_age=datetime.timedelta(days=props.max_age_days),
)
if key in builder_state:
api.step.empty('open bug')
recently_failed = (
api.builder_status.has_recently_failed(
status,
n=props.is_currently_failing_builds,
)
)
if recently_failed:
api.step.empty('recently failed')
else:
api.step.empty('no recent failures')
if not recently_failed:
if props.dry_run:
api.step.empty('mark fixed')
else:
api.issuetracker.mark_issue_as_fixed(
'mark fixed', builder_state[key]['bug_id']
)
builder_state[key]['foo'] = 'bar'
del builder_state[key]
continue
api.step.empty('no open bug')
was_passing = api.builder_status.has_recently_passed(
status,
n=props.was_recently_passing_builds,
)
if was_passing:
api.step.empty('was passing')
else:
api.step.empty('was not passing')
is_passing = api.builder_status.has_recently_passed(
status,
n=props.is_currently_passing_builds,
)
if is_passing:
api.step.empty('is passing')
else:
api.step.empty('is not passing')
if not was_passing or is_passing:
api.step.empty('no bug to file')
continue
subject = f'{key} persistently failing'
desc = []
desc.append(f'{key} persistently failing:')
desc.append(
api.buildbucket.builder_url(
bucket=bucket.name,
builder=builder.name,
),
)
desc.append('')
for build in status.builds:
if build.status == common_pb.SUCCESS:
break
desc.append(
f'* {api.buildbucket.build_url(build_id=build.id)}'
)
desc.append('')
bug: api.issuetracker.Bug
if props.dry_run:
full_desc = '\n'.join(desc)
pres = api.step.empty('file bug').presentation
pres.step_summary_text = f'{subject}\n\n{full_desc}'
bug = api.issuetracker.Bug(
id=123,
link='https://pwrev.dev/123',
)
else:
bug = api.issuetracker.file_bug(
'file bug',
subject,
'\n'.join(desc),
props.component_id,
)
builder_state[key] = {'bug_id': bug.id, 'timestamp': now}
bugs_filed.append(f'[{key}]({bug.link})')
# Save state immediately after we file the bug. Then, if
# the builder fails for some unrelated reason the bug id is
# still saved.
api.builder_state.save(builder_state)
for key, builder in list(builder_state.items()):
if now - builder['timestamp'] > props.purge_timeout_days * 24 * 60 * 60:
api.step.empty(f'obsolete {key}')
del builder_state[key]
api.builder_state.save(builder_state)
return result.RawResult(
summary_markdown=''.join(f'* {x}\n' for x in bugs_filed),
status=common_pb.SUCCESS,
)
_START = 100000000
_DAY = 24 * 60 * 60
def GenTests(api) -> Generator[recipe_test_api.TestData, None, None]:
def test(name, *args, **kwargs):
return api.test(
name,
api.buildbucket.ci_build(project='pigweed'),
api.time.seed(_START),
*args,
**kwargs,
)
def properties(
*,
included_buckets: Sequence[str] = (),
excluded_buckets: Sequence[str] = (),
**kwargs,
):
props = InputProperties(**kwargs)
props.included_buckets.extend(included_buckets)
props.excluded_buckets.extend(excluded_buckets)
return api.properties(props)
def buildbucket_config(buckets: bb_pb.Bucket):
cfg = bb_pb.BuildbucketCfg()
cfg.buckets.extend(buckets)
return cfg
def bucket_config(
name: str,
builders: Sequence[bb_pb.BuilderConfig],
):
cfg = bb_pb.Bucket(name=name)
cfg.swarming.builders.extend(builders)
return cfg
def builder_config(name: str):
kwargs = {}
if 'experimental' in name:
kwargs['experimental'] = bb_pb.YES
return bb_pb.BuilderConfig(name=name, **kwargs)
def mock_buildbucket_config(
*buckets_builders: Sequence[tuple[str, Sequence[str]]],
):
buckets: List[bb_pb.Bucket] = []
for bucket_name, builder_names in buckets_builders:
builders: List[bb_pb.BuilderConfig] = []
for builder in builder_names:
builders.append(builder_config(builder))
buckets.append(bucket_config(bucket_name, builders))
return api.luci_config.mock_config(
project='pigweed',
config_name='cr-buildbucket.cfg',
data=buildbucket_config(buckets),
)
def excluding_bucket(bucket):
return api.post_process(post_process.MustRun, f'{bucket}.excluding')
def including_bucket(bucket):
return api.post_process(post_process.DoesNotRun, f'{bucket}.excluding')
def experimental_skipping(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.experimental, skipping',
)
def open_bug_exists(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.open bug',
)
def no_open_bug_exists(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.no open bug',
)
def build_status(bucket, builder, *statuses: Sequence[str]):
return api.buildbucket.simulated_search_results(
[
getattr(api.builder_status, x)()
for x in itertools.chain(*statuses)
],
step_name=f'{bucket}.{builder}.buildbucket.search',
)
def recently_failed(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.recently failed',
)
def no_recent_failures(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.no recent failures',
)
def was_passing(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.was passing',
)
def was_not_passing(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.was not passing',
)
def is_passing(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.is passing',
)
def is_not_passing(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.is not passing',
)
def no_bug_to_file(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.no bug to file',
)
def bug_filed(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.file bug',
)
def marked_fixed(bucket, builder):
return api.post_process(
post_process.MustRun,
f'{bucket}.{builder}.mark fixed',
)
def obsolete(bucket, builder):
return api.post_process(
post_process.MustRun,
f'obsolete {bucket}/{builder}',
)
def output_state_contains(bucket, builder):
return api.post_process(
post_process.PropertyMatchesCallable,
'state',
lambda val: f'{bucket}/{builder}' in val,
)
def output_state_lacks(bucket, builder):
return api.post_process(
post_process.PropertyMatchesCallable,
'state',
lambda val: f'{bucket}/{builder}' not in val,
)
def drop_expectations_must_be_last():
# No need for expectation files, everything of note here is tested by
# assertions. This must be the last thing added to the test.
return api.post_process(post_process.DropExpectation)
bb_config_no_builders = mock_buildbucket_config(
('dev.ci', ()),
('dev.try', ()),
('ci', ()),
('try', ()),
)
yield test(
'default-exclusions',
bb_config_no_builders,
excluding_bucket('dev.ci'),
excluding_bucket('dev.try'),
including_bucket('ci'),
excluding_bucket('try'),
drop_expectations_must_be_last(),
)
yield test(
'no-dev-exclusion',
properties(included_buckets=("*.ci", "ci")),
bb_config_no_builders,
including_bucket('dev.ci'),
excluding_bucket('dev.try'),
including_bucket('ci'),
excluding_bucket('try'),
drop_expectations_must_be_last(),
)
yield test(
'exclude-experimental',
mock_buildbucket_config(('abc.ci', ('foo-experimental',))),
including_bucket('abc.ci'),
experimental_skipping('abc.ci', 'foo-experimental'),
drop_expectations_must_be_last(),
)
abc_bb_config = mock_buildbucket_config(('abc.ci', ('foo',)))
yield test(
'exclude-abc',
properties(excluded_buckets=("abc.*",)),
abc_bb_config,
excluding_bucket('abc.ci'),
drop_expectations_must_be_last(),
)
yield test(
'passing',
abc_bb_config,
no_open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['passed'] * 20),
was_passing('abc.ci', 'foo'),
is_passing('abc.ci', 'foo'),
no_bug_to_file('abc.ci', 'foo'),
output_state_lacks('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'failing',
abc_bb_config,
no_open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['failure'] * 20),
was_not_passing('abc.ci', 'foo'),
is_not_passing('abc.ci', 'foo'),
no_bug_to_file('abc.ci', 'foo'),
output_state_lacks('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'flaky',
abc_bb_config,
no_open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['failure', 'passed'] * 10),
was_passing('abc.ci', 'foo'),
is_passing('abc.ci', 'foo'),
no_bug_to_file('abc.ci', 'foo'),
output_state_lacks('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'newly-failing',
abc_bb_config,
no_open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['failure'] * 5, ['passed'] * 15),
was_passing('abc.ci', 'foo'),
is_not_passing('abc.ci', 'foo'),
bug_filed('abc.ci', 'foo'),
output_state_contains('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'newly-failing-dryrun',
properties(dry_run=True),
abc_bb_config,
no_open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['failure'] * 5, ['passed'] * 15),
was_passing('abc.ci', 'foo'),
is_not_passing('abc.ci', 'foo'),
bug_filed('abc.ci', 'foo'),
output_state_contains('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'bug-failing',
api.builder_state(
{'abc.ci/foo': {'timestamp': _START - _DAY, 'bug_id': 123}},
),
abc_bb_config,
open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['failure'] * 20),
recently_failed('abc.ci', 'foo'),
output_state_contains('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'bug-passing',
api.builder_state(
{'abc.ci/foo': {'timestamp': _START - _DAY, 'bug_id': 123}},
),
abc_bb_config,
open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['passed'] * 5, ['failure'] * 15),
no_recent_failures('abc.ci', 'foo'),
marked_fixed('abc.ci', 'foo'),
output_state_lacks('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'bug-passing-dryrun',
properties(dry_run=True),
api.builder_state(
{'abc.ci/foo': {'timestamp': _START - _DAY, 'bug_id': 123}},
),
abc_bb_config,
open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['passed'] * 5, ['failure'] * 15),
no_recent_failures('abc.ci', 'foo'),
marked_fixed('abc.ci', 'foo'),
output_state_lacks('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)
yield test(
'bug-failing-obsolete',
api.builder_state(
{'abc.ci/foo': {'timestamp': _START - 40 * _DAY, 'bug_id': 123}},
),
abc_bb_config,
open_bug_exists('abc.ci', 'foo'),
build_status('abc.ci', 'foo', ['failure'] * 20),
recently_failed('abc.ci', 'foo'),
obsolete('abc.ci', 'foo'),
output_state_lacks('abc.ci', 'foo'),
drop_expectations_must_be_last(),
)