blob: 7f8bdc0c711e7e7e696af756808cf2a2ff17c494 [file] [log] [blame] [edit]
#!/usr/bin/env python3
#
# Copyright 2022 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=invalid-name
# pylint: disable=line-too-long
# pylint: disable=missing-function-docstring
"""A script to perform BCR validations for Bazel modules
Validations performed are:
- Verify versions in metadata.json matches existing versions
- Verify the source archive URL match the source repositories
- Verify the source archive URL is stable
- Verify if the presubmit.yml file matches the previous version
- If not, we should require BCR maintainer review.
- Verify the checked in MODULE.bazel file matches the one in the extracted and patched source tree.
"""
import argparse
import json
import subprocess
from pathlib import Path
import shutil
import sys
import tempfile
import os
import yaml
from enum import Enum
from difflib import unified_diff
from urllib.parse import urlparse
from registry import RegistryClient
from registry import Version
from registry import download
from registry import download_file
from registry import integrity
from registry import read
from verify_stable_archives import UrlStability
from verify_stable_archives import verify_stable_archive
class BcrValidationResult(Enum):
GOOD = 1
NEED_BCR_MAINTAINER_REVIEW = 2
FAILED = 3
RED = "\x1b[31m"
GREEN = "\x1b[32m"
YELLOW = "\x1b[33m"
RESET = "\x1b[0m"
COLOR = {
BcrValidationResult.GOOD: GREEN,
BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW: YELLOW,
BcrValidationResult.FAILED: RED,
}
def print_collapsed_group(name):
print("\n\n--- {0}\n\n".format(name))
def print_expanded_group(name):
print("\n\n+++ {0}\n\n".format(name))
def parse_module_versions(registry, check_all, inputs):
"""Parse module versions to be validated from input."""
if check_all:
return registry.get_all_module_versions()
if not inputs:
return []
result = []
for s in inputs:
if "@" in s:
name, version = s.split("@")
result.append((name, version))
else:
result.extend(registry.get_module_versions(s))
return result
def apply_patch(work_dir, patch_strip, patch_file):
# Requires patch to be installed
subprocess.run(
["patch", "-p%d" % patch_strip, "-f", "-l", "-i", patch_file],
shell=False,
check=True,
env=os.environ,
cwd=work_dir,
)
def fix_line_endings(lines):
return [line.rstrip() + "\n" for line in lines]
class BcrValidationException(Exception):
"""
Raised whenever we should stop the validation immediately.
"""
class BcrValidator:
def __init__(self, registry, should_fix):
self.validation_results = []
self.registry = registry
# Whether the validator should try to fix the detected error.
self.should_fix = should_fix
def report(self, type, message):
color = COLOR[type]
print(f"{color}{type}{RESET}: {message}\n")
self.validation_results.append((type, message))
def verify_module_existence(self, module_name, version):
"""Verify the directory exists and the version is recorded in metadata.json."""
if not self.registry.contains(module_name, version):
self.report(BcrValidationResult.FAILED, f"{module_name}@{version} doesn't exist.")
raise BcrValidationException("The module to be validated doesn't exist!")
versions = self.registry.get_metadata(module_name)["versions"]
if version not in versions:
self.report(
BcrValidationResult.FAILED, f"Version {version} is not recorded in {module_name}'s metadata.json file."
)
else:
self.report(BcrValidationResult.GOOD, "The module exists and is recorded in metadata.json.")
def verify_source_archive_url_match_github_repo(self, module_name, version):
"""Verify the source archive URL matches the github repo. For now, we only support github repositories check."""
source_url = self.registry.get_source(module_name, version)["url"]
source_repositories = self.registry.get_metadata(module_name).get("repository", [])
matched = not source_repositories
for source_repository in source_repositories:
if matched:
break
repo_type, repo_path = source_repository.split(":")
if repo_type == "github":
parts = urlparse(source_url)
matched = (
parts.scheme == "https"
and parts.netloc == "github.com"
and os.path.abspath(parts.path).startswith(f"/{repo_path}/")
)
elif repo_type == "https":
repo = urlparse(source_repository)
parts = urlparse(source_url)
matched = (
parts.scheme == repo.scheme
and parts.netloc == repo.netloc
and os.path.abspath(parts.path).startswith(f"{repo.path}/")
)
if not matched:
self.report(
BcrValidationResult.FAILED,
f"The source URL of {module_name}@{version} ({source_url}) doesn't match any of the module's source repositories {source_repositories}.",
)
else:
self.report(BcrValidationResult.GOOD, "The source URL matches one of the source repositories.")
def verify_source_archive_url_stability(self, module_name, version):
"""Verify source archive URL is stable"""
source_url = self.registry.get_source(module_name, version)["url"]
if verify_stable_archive(source_url) == UrlStability.UNSTABLE:
self.report(
BcrValidationResult.FAILED,
f"{module_name}@{version} is using an unstable source url: `{source_url}`.\n"
+ "You should use a release archive URL in the format of "
+ "`https://github.com/<ORGANIZATION>/<REPO>/releases/download/<version>/<name>.tar.gz` "
+ "to ensure the archive checksum stability.\n"
+ "See https://blog.bazel.build/2023/02/15/github-archive-checksum.html for more context.",
)
else:
self.report(BcrValidationResult.GOOD, "The source URL doesn't look unstable.")
def verify_source_archive_url_integrity(self, module_name, version):
"""Verify the integrity value of the URL is correct."""
source_url = self.registry.get_source(module_name, version)["url"]
expected_integrity = self.registry.get_source(module_name, version)["integrity"]
algorithm, _ = expected_integrity.split("-", 1)
real_integrity = integrity(download(source_url), algorithm)
if real_integrity != expected_integrity:
self.report(
BcrValidationResult.FAILED,
f"{module_name}@{version}'s source archive `{source_url}` has expected integrity value "
f"`{expected_integrity}`, but the real integrity value is `{real_integrity}`!",
)
else:
self.report(BcrValidationResult.GOOD, "The source archive's integrity value matches.")
def verify_presubmit_yml_change(self, module_name, version):
"""Verify if the presubmit.yml is the same as the previous version."""
versions = self.registry.get_metadata(module_name)["versions"]
versions.sort(key=Version)
index = versions.index(version)
if index == 0:
self.report(
BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW,
f"Module version {module_name}@{version} is new, the presubmit.yml file "
"should be reviewed by a BCR maintainer.",
)
elif index > 0:
pre_version = versions[index - 1]
previous_presubmit_yml = self.registry.get_presubmit_yml_path(module_name, pre_version)
previous_presubmit_content = open(previous_presubmit_yml, "r").readlines()
current_presubmit_yml = self.registry.get_presubmit_yml_path(module_name, version)
current_presubmit_content = open(current_presubmit_yml, "r").readlines()
diff = list(
unified_diff(
previous_presubmit_content,
current_presubmit_content,
fromfile=str(previous_presubmit_yml),
tofile=str(current_presubmit_yml),
)
)
if diff:
self.report(
BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW,
f"The presubmit.yml file of {module_name}@{version} doesn't match its previous version "
f"{module_name}@{pre_version}, the following presubmit.yml file change "
"should be reviewed by a BCR maintainer.\n " + " ".join(diff),
)
else:
self.report(BcrValidationResult.GOOD, "The presubmit.yml file matches the previous version.")
def add_module_dot_bazel_patch(self, diff, module_name, version):
"""Adding a patch file for MODULE.bazel according to the diff result."""
source = self.registry.get_source(module_name, version)
patch_file = self.registry.get_patch_file_path(module_name, version, "module_dot_bazel.patch")
patch_file.parent.mkdir(parents=True, exist_ok=True)
open(patch_file, "w").writelines(diff)
source["patch_strip"] = int(source.get("patch_strip", 0))
patches = source.get("patches", {})
patches["module_dot_bazel.patch"] = integrity(read(patch_file))
source["patches"] = patches
source_json_content = json.dumps(source, indent=4) + "\n"
self.registry.get_source_json_path(module_name, version).write_text(source_json_content)
def verify_module_dot_bazel(self, module_name, version):
source = self.registry.get_source(module_name, version)
source_url = source["url"]
tmp_dir = Path(tempfile.mkdtemp())
archive_file = tmp_dir.joinpath(source_url.split("/")[-1].split("?")[0])
output_dir = tmp_dir.joinpath("source_root")
download_file(source_url, archive_file)
shutil.unpack_archive(str(archive_file), output_dir)
# Apply patch files if there are any, also verify their integrity values
source_root = output_dir.joinpath(source["strip_prefix"] if "strip_prefix" in source else "")
if "patches" in source:
for patch_name, expected_integrity in source["patches"].items():
patch_file = self.registry.get_patch_file_path(module_name, version, patch_name)
actual_integrity = integrity(read(patch_file))
if actual_integrity != expected_integrity:
self.report(
BcrValidationResult.FAILED,
f"The patch file `{patch_file}` has expected integrity value `{expected_integrity}`, "
f"but the real integrity value is `{actual_integrity}`.",
)
apply_patch(source_root, source["patch_strip"], str(patch_file.resolve()))
if "overlay" in source:
overlay_dir = self.registry.get_overlay_dir(module_name, version)
for overlay_file, expected_integrity in source["overlay"].items():
overlay_src = overlay_dir / overlay_file
overlay_dst = source_root / overlay_file
try:
overlay_dst.resolve().relative_to(source_root)
except ValueError:
self.report(
BcrValidationResult.FAILED,
f"The overlay file path `{overlay_file}` must point inside the source archive.",
)
continue
try:
actual_integrity = integrity(read(overlay_src))
except FileNotFoundError:
self.report(BcrValidationResult.FAILED, f"The overlay file `{overlay_file}` does not exist")
continue
if actual_integrity != expected_integrity:
self.report(
BcrValidationResult.FAILED,
f"The overlay file `{overlay_file}` has expected integrity value `{expected_integrity}`, "
f"but the real integrity value is `{actual_integrity}`.",
)
continue
overlay_dst.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(overlay_src, overlay_dst)
source_module_dot_bazel = source_root.joinpath("MODULE.bazel")
if source_module_dot_bazel.exists():
source_module_dot_bazel_content = open(source_module_dot_bazel, "r").readlines()
else:
source_module_dot_bazel_content = []
bcr_module_dot_bazel_content = open(
self.registry.get_module_dot_bazel_path(module_name, version), "r"
).readlines()
source_module_dot_bazel_content = fix_line_endings(source_module_dot_bazel_content)
bcr_module_dot_bazel_content = fix_line_endings(bcr_module_dot_bazel_content)
file_name = "a/" * int(source.get("patch_strip", 0)) + "MODULE.bazel"
diff = list(
unified_diff(
source_module_dot_bazel_content, bcr_module_dot_bazel_content, fromfile=file_name, tofile=file_name
)
)
if diff:
self.report(
BcrValidationResult.FAILED,
"Checked in MODULE.bazel file doesn't match the one in the extracted and patched sources.\n"
+ f"Please fix the MODULE.bazel file or you can add the following patch to {module_name}@{version}:\n"
+ " "
+ " ".join(diff),
)
if self.should_fix:
self.add_module_dot_bazel_patch(diff, module_name, version)
else:
self.report(BcrValidationResult.GOOD, "Checked in MODULE.bazel matches the sources.")
shutil.rmtree(tmp_dir)
def check_if_bazel_version_is_set(self, tasks):
for task_name, task_config in tasks.items():
if "bazel" not in task_config:
self.report(
BcrValidationResult.FAILED,
"Missing bazel version for task '%s' in the presubmit.yml file." % task_name,
)
def validate_presubmit_yml(self, module_name, version):
presubmit_yml = self.registry.get_presubmit_yml_path(module_name, version)
presubmit = yaml.safe_load(open(presubmit_yml, "r"))
report_num_old = len(self.validation_results)
tasks = presubmit.get("tasks", {})
self.check_if_bazel_version_is_set(tasks)
test_module_tasks = {}
if "bcr_test_module" in presubmit:
test_module_tasks = presubmit["bcr_test_module"].get("tasks", {})
self.check_if_bazel_version_is_set(test_module_tasks)
if not tasks and not test_module_tasks:
self.report(BcrValidationResult.FAILED, "At least one task should be specified in the presubmit.yml file.")
report_num_new = len(self.validation_results)
if report_num_new == report_num_old:
self.report(BcrValidationResult.GOOD, "The presubmit.yml file is valid.")
def verify_module_name_conflict(self):
"""Verify no module name conflict when ignoring case sensitivity."""
module_names = self.registry.get_all_modules()
conflict_found = False
module_group = {}
for name in module_names:
module_group.setdefault(name.lower(), []).append(name)
for name, modules in module_group.items():
if len(modules) > 1:
conflict_found = True
self.report(BcrValidationResult.FAILED, f"Module name conflict found: {', '.join(modules)}")
if not conflict_found:
self.report(BcrValidationResult.GOOD, "No module name conflict found.")
def validate_module(self, module_name, version, skipped_validations):
print_expanded_group(f"Validating {module_name}@{version}")
self.verify_module_name_conflict()
self.verify_module_existence(module_name, version)
if "source_repo" not in skipped_validations:
self.verify_source_archive_url_match_github_repo(module_name, version)
if "url_stability" not in skipped_validations:
self.verify_source_archive_url_stability(module_name, version)
self.verify_source_archive_url_integrity(module_name, version)
if "presubmit_yml" not in skipped_validations:
self.verify_presubmit_yml_change(module_name, version)
self.validate_presubmit_yml(module_name, version)
self.verify_module_dot_bazel(module_name, version)
def validate_all_metadata(self):
print_expanded_group("Validating all metadata.json files")
has_error = False
for module_name in self.registry.get_all_modules():
try:
metadata = self.registry.get_metadata(module_name)
except json.JSONDecodeError as e:
self.report(BcrValidationResult.FAILED, f"Failed to load {module_name}'s metadata.json file: " + str(e))
has_error = True
continue
sorted_versions = sorted(metadata["versions"], key=Version)
if sorted_versions != metadata["versions"]:
self.report(
BcrValidationResult.FAILED,
f"{module_name}'s metadata.json file is not sorted by version.\n "
f"Sorted versions: {sorted_versions}.\n "
f"Original versions: {metadata['versions']}",
)
has_error = True
for version in metadata["versions"]:
if not self.registry.contains(module_name, version):
self.report(
BcrValidationResult.FAILED,
f"{module_name}@{version} doesn't exist, "
f"but it's recorded in {module_name}'s metadata.json file.",
)
has_error = True
if not has_error:
self.report(BcrValidationResult.GOOD, "All metadata.json files are valid.")
def getValidationReturnCode(self):
# Calculate the overall return code
# 0: All good
# 1: BCR validation failed
# 42: BCR validation passes, but some changes need BCR maintainer review before trigging follow up BCR presubmit jobs.
result_codes = [code for code, _ in self.validation_results]
if BcrValidationResult.FAILED in result_codes:
return 1
if BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW in result_codes:
# Use a special return code to avoid conflict with other error code
return 42
return 0
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
parser = argparse.ArgumentParser()
parser.add_argument(
"--registry",
type=str,
default=".",
help="Specify the root path of the registry (default: the current working directory).",
)
parser.add_argument(
"--check",
type=str,
action="append",
help="Specify a Bazel module version you want to perform the BCR check on."
+ " (e.g. bazel_skylib@1.3.0). If no version is specified, all versions of that module are checked."
+ " This flag can be repeated to accept multiple module versions.",
)
parser.add_argument(
"--check_all", action="store_true", help="Check all Bazel modules in the registry, ignore other --check flags."
)
parser.add_argument(
"--check_all_metadata", action="store_true", help="Check all Bazel module metadata in the registry."
)
parser.add_argument(
"--fix", action="store_true", help="Should the script try to fix the detected validation errors."
)
parser.add_argument(
"--skip_validation",
type=str,
default=[],
action="append",
help='Bypass the given step for validating modules. Supported values are: "url_stability", '
+ 'to bypass the URL stability check; "presubmit_yml", to bypass the presubmit.yml check; '
+ '"source_repo", to bypass the source repo verification; '
+ "This flag can be repeated to skip multiple validations.",
)
args = parser.parse_args(argv)
if not args.check_all and not args.check and not args.check_all_metadata:
parser.print_help()
return -1
registry = RegistryClient(args.registry)
# Parse what module versions we should validate
module_versions = parse_module_versions(registry, args.check_all, args.check)
if module_versions:
print_expanded_group("Module versions to be validated:")
for name, version in module_versions:
print(f"{name}@{version}")
# Validate given module version.
validator = BcrValidator(registry, args.fix)
for name, version in module_versions:
validator.validate_module(name, version, args.skip_validation)
if args.check_all_metadata:
validator.validate_all_metadata()
return validator.getValidationReturnCode()
if __name__ == "__main__":
# Under 'bazel run' we want to run within the source folder instead of the execroot.
if os.getenv("BUILD_WORKSPACE_DIRECTORY"):
os.chdir(os.getenv("BUILD_WORKSPACE_DIRECTORY"))
sys.exit(main())