tools/bcr_validation.py - third_party/github/bazelbuild/bazel-central-registry - Git at Google

 #!/usr/bin/env python3
 #
 # Copyright 2022 The Bazel Authors. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # pylint: disable=invalid-name
 # pylint: disable=line-too-long
 # pylint: disable=missing-function-docstring
 """A script to perform BCR validations for Bazel modules

 Validations performed are:
   - Verify versions in metadata.json matches existing versions
   - Verify the source archive URL match the source repositories
   - Verify the source archive URL is stable
   - Verify if the presubmit.yml file matches the previous version
     - If not, we should require BCR maintainer review.
   - Verify the checked in MODULE.bazel file matches the one in the extracted and patched source tree.
 """

 import argparse
 import json
 import subprocess
 from pathlib import Path
 import shutil
 import sys
 import tempfile
 import os
 import yaml

 from enum import Enum
 from difflib import unified_diff
 from urllib.parse import urlparse

 from registry import RegistryClient
 from registry import Version
 from registry import download
 from registry import download_file
 from registry import integrity
 from registry import read
 from verify_stable_archives import UrlStability
 from verify_stable_archives import verify_stable_archive


 class BcrValidationResult(Enum):
     GOOD = 1
     NEED_BCR_MAINTAINER_REVIEW = 2
     FAILED = 3


 RED = "\x1b[31m"
 GREEN = "\x1b[32m"
 YELLOW = "\x1b[33m"
 RESET = "\x1b[0m"

 COLOR = {
     BcrValidationResult.GOOD: GREEN,
     BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW: YELLOW,
     BcrValidationResult.FAILED: RED,
 }


 def print_collapsed_group(name):
     print("\n\n--- {0}\n\n".format(name))


 def print_expanded_group(name):
     print("\n\n+++ {0}\n\n".format(name))


 def parse_module_versions(registry, check_all, inputs):
     """Parse module versions to be validated from input."""
     if check_all:
         return registry.get_all_module_versions()
     if not inputs:
         return []
     result = []
     for s in inputs:
         if "@" in s:
             name, version = s.split("@")
             result.append((name, version))
         else:
             result.extend(registry.get_module_versions(s))
     return result


 def apply_patch(work_dir, patch_strip, patch_file):
     # Requires patch to be installed
     subprocess.run(
         ["patch", "-p%d" % patch_strip, "-f", "-l", "-i", patch_file],
         shell=False,
         check=True,
         env=os.environ,
         cwd=work_dir,
     )


 def fix_line_endings(lines):
     return [line.rstrip() + "\n" for line in lines]


 class BcrValidationException(Exception):
     """
     Raised whenever we should stop the validation immediately.
     """


 class BcrValidator:
     def __init__(self, registry, should_fix):
         self.validation_results = []
         self.registry = registry
         # Whether the validator should try to fix the detected error.
         self.should_fix = should_fix

     def report(self, type, message):
         color = COLOR[type]
         print(f"{color}{type}{RESET}: {message}\n")
         self.validation_results.append((type, message))

     def verify_module_existence(self, module_name, version):
         """Verify the directory exists and the version is recorded in metadata.json."""
         if not self.registry.contains(module_name, version):
             self.report(BcrValidationResult.FAILED, f"{module_name}@{version} doesn't exist.")
             raise BcrValidationException("The module to be validated doesn't exist!")
         versions = self.registry.get_metadata(module_name)["versions"]
         if version not in versions:
             self.report(
                 BcrValidationResult.FAILED, f"Version {version} is not recorded in {module_name}'s metadata.json file."
             )
         else:
             self.report(BcrValidationResult.GOOD, "The module exists and is recorded in metadata.json.")

     def verify_source_archive_url_match_github_repo(self, module_name, version):
         """Verify the source archive URL matches the github repo. For now, we only support github repositories check."""
         source_url = self.registry.get_source(module_name, version)["url"]
         source_repositories = self.registry.get_metadata(module_name).get("repository", [])
         matched = not source_repositories
         for source_repository in source_repositories:
             if matched:
                 break
             repo_type, repo_path = source_repository.split(":")
             if repo_type == "github":
                 parts = urlparse(source_url)
                 matched = (
                     parts.scheme == "https"
                     and parts.netloc == "github.com"
                     and os.path.abspath(parts.path).startswith(f"/{repo_path}/")
                 )
             elif repo_type == "https":
                 repo = urlparse(source_repository)
                 parts = urlparse(source_url)
                 matched = (
                     parts.scheme == repo.scheme
                     and parts.netloc == repo.netloc
                     and os.path.abspath(parts.path).startswith(f"{repo.path}/")
                 )
         if not matched:
             self.report(
                 BcrValidationResult.FAILED,
                 f"The source URL of {module_name}@{version} ({source_url}) doesn't match any of the module's source repositories {source_repositories}.",
             )
         else:
             self.report(BcrValidationResult.GOOD, "The source URL matches one of the source repositories.")

     def verify_source_archive_url_stability(self, module_name, version):
         """Verify source archive URL is stable"""
         source_url = self.registry.get_source(module_name, version)["url"]
         if verify_stable_archive(source_url) == UrlStability.UNSTABLE:
             self.report(
                 BcrValidationResult.FAILED,
                 f"{module_name}@{version} is using an unstable source url: `{source_url}`.\n"
                 + "You should use a release archive URL in the format of "
                 + "`https://github.com/<ORGANIZATION>/<REPO>/releases/download/<version>/<name>.tar.gz` "
                 + "to ensure the archive checksum stability.\n"
                 + "See https://blog.bazel.build/2023/02/15/github-archive-checksum.html for more context.",
             )
         else:
             self.report(BcrValidationResult.GOOD, "The source URL doesn't look unstable.")

     def verify_source_archive_url_integrity(self, module_name, version):
         """Verify the integrity value of the URL is correct."""
         source_url = self.registry.get_source(module_name, version)["url"]
         expected_integrity = self.registry.get_source(module_name, version)["integrity"]
         algorithm, _ = expected_integrity.split("-", 1)
         real_integrity = integrity(download(source_url), algorithm)
         if real_integrity != expected_integrity:
             self.report(
                 BcrValidationResult.FAILED,
                 f"{module_name}@{version}'s source archive `{source_url}` has expected integrity value "
                 f"`{expected_integrity}`, but the real integrity value is `{real_integrity}`!",
             )
         else:
             self.report(BcrValidationResult.GOOD, "The source archive's integrity value matches.")

     def verify_presubmit_yml_change(self, module_name, version):
         """Verify if the presubmit.yml is the same as the previous version."""
         versions = self.registry.get_metadata(module_name)["versions"]
         versions.sort(key=Version)
         index = versions.index(version)
         if index == 0:
             self.report(
                 BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW,
                 f"Module version {module_name}@{version} is new, the presubmit.yml file "
                 "should be reviewed by a BCR maintainer.",
             )
         elif index > 0:
             pre_version = versions[index - 1]
             previous_presubmit_yml = self.registry.get_presubmit_yml_path(module_name, pre_version)
             previous_presubmit_content = open(previous_presubmit_yml, "r").readlines()
             current_presubmit_yml = self.registry.get_presubmit_yml_path(module_name, version)
             current_presubmit_content = open(current_presubmit_yml, "r").readlines()
             diff = list(
                 unified_diff(
                     previous_presubmit_content,
                     current_presubmit_content,
                     fromfile=str(previous_presubmit_yml),
                     tofile=str(current_presubmit_yml),
                 )
             )
             if diff:
                 self.report(
                     BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW,
                     f"The presubmit.yml file of {module_name}@{version} doesn't match its previous version "
                     f"{module_name}@{pre_version}, the following presubmit.yml file change "
                     "should be reviewed by a BCR maintainer.\n    " + "    ".join(diff),
                 )
             else:
                 self.report(BcrValidationResult.GOOD, "The presubmit.yml file matches the previous version.")

     def add_module_dot_bazel_patch(self, diff, module_name, version):
         """Adding a patch file for MODULE.bazel according to the diff result."""
         source = self.registry.get_source(module_name, version)
         patch_file = self.registry.get_patch_file_path(module_name, version, "module_dot_bazel.patch")
         patch_file.parent.mkdir(parents=True, exist_ok=True)
         open(patch_file, "w").writelines(diff)
         source["patch_strip"] = int(source.get("patch_strip", 0))
         patches = source.get("patches", {})
         patches["module_dot_bazel.patch"] = integrity(read(patch_file))
         source["patches"] = patches
         source_json_content = json.dumps(source, indent=4) + "\n"
         self.registry.get_source_json_path(module_name, version).write_text(source_json_content)

     def verify_module_dot_bazel(self, module_name, version):
         source = self.registry.get_source(module_name, version)
         source_url = source["url"]
         tmp_dir = Path(tempfile.mkdtemp())
         archive_file = tmp_dir.joinpath(source_url.split("/")[-1].split("?")[0])
         output_dir = tmp_dir.joinpath("source_root")
         download_file(source_url, archive_file)
         shutil.unpack_archive(str(archive_file), output_dir)

         # Apply patch files if there are any, also verify their integrity values
         source_root = output_dir.joinpath(source["strip_prefix"] if "strip_prefix" in source else "")
         if "patches" in source:
             for patch_name, expected_integrity in source["patches"].items():
                 patch_file = self.registry.get_patch_file_path(module_name, version, patch_name)
                 actual_integrity = integrity(read(patch_file))
                 if actual_integrity != expected_integrity:
                     self.report(
                         BcrValidationResult.FAILED,
                         f"The patch file `{patch_file}` has expected integrity value `{expected_integrity}`, "
                         f"but the real integrity value is `{actual_integrity}`.",
                     )
                 apply_patch(source_root, source["patch_strip"], str(patch_file.resolve()))
         if "overlay" in source:
             overlay_dir = self.registry.get_overlay_dir(module_name, version)
             for overlay_file, expected_integrity in source["overlay"].items():
                 overlay_src = overlay_dir / overlay_file
                 overlay_dst = source_root / overlay_file
                 try:
                     overlay_dst.resolve().relative_to(source_root)
                 except ValueError:
                     self.report(
                         BcrValidationResult.FAILED,
                         f"The overlay file path `{overlay_file}` must point inside the source archive.",
                     )
                     continue
                 try:
                     actual_integrity = integrity(read(overlay_src))
                 except FileNotFoundError:
                     self.report(BcrValidationResult.FAILED, f"The overlay file `{overlay_file}` does not exist")
                     continue
                 if actual_integrity != expected_integrity:
                     self.report(
                         BcrValidationResult.FAILED,
                         f"The overlay file `{overlay_file}` has expected integrity value `{expected_integrity}`, "
                         f"but the real integrity value is `{actual_integrity}`.",
                     )
                     continue
                 overlay_dst.parent.mkdir(parents=True, exist_ok=True)
                 shutil.copy2(overlay_src, overlay_dst)

         source_module_dot_bazel = source_root.joinpath("MODULE.bazel")
         if source_module_dot_bazel.exists():
             source_module_dot_bazel_content = open(source_module_dot_bazel, "r").readlines()
         else:
             source_module_dot_bazel_content = []
         bcr_module_dot_bazel_content = open(
             self.registry.get_module_dot_bazel_path(module_name, version), "r"
         ).readlines()
         source_module_dot_bazel_content = fix_line_endings(source_module_dot_bazel_content)
         bcr_module_dot_bazel_content = fix_line_endings(bcr_module_dot_bazel_content)
         file_name = "a/" * int(source.get("patch_strip", 0)) + "MODULE.bazel"
         diff = list(
             unified_diff(
                 source_module_dot_bazel_content, bcr_module_dot_bazel_content, fromfile=file_name, tofile=file_name
             )
         )

         if diff:
             self.report(
                 BcrValidationResult.FAILED,
                 "Checked in MODULE.bazel file doesn't match the one in the extracted and patched sources.\n"
                 + f"Please fix the MODULE.bazel file or you can add the following patch to {module_name}@{version}:\n"
                 + "    "
                 + "    ".join(diff),
             )
             if self.should_fix:
                 self.add_module_dot_bazel_patch(diff, module_name, version)
         else:
             self.report(BcrValidationResult.GOOD, "Checked in MODULE.bazel matches the sources.")

         shutil.rmtree(tmp_dir)

     def check_if_bazel_version_is_set(self, tasks):
         for task_name, task_config in tasks.items():
             if "bazel" not in task_config:
                 self.report(
                     BcrValidationResult.FAILED,
                     "Missing bazel version for task '%s' in the presubmit.yml file." % task_name,
                 )

     def validate_presubmit_yml(self, module_name, version):
         presubmit_yml = self.registry.get_presubmit_yml_path(module_name, version)
         presubmit = yaml.safe_load(open(presubmit_yml, "r"))
         report_num_old = len(self.validation_results)
         tasks = presubmit.get("tasks", {})
         self.check_if_bazel_version_is_set(tasks)
         test_module_tasks = {}
         if "bcr_test_module" in presubmit:
             test_module_tasks = presubmit["bcr_test_module"].get("tasks", {})
             self.check_if_bazel_version_is_set(test_module_tasks)
         if not tasks and not test_module_tasks:
             self.report(BcrValidationResult.FAILED, "At least one task should be specified in the presubmit.yml file.")
         report_num_new = len(self.validation_results)
         if report_num_new == report_num_old:
             self.report(BcrValidationResult.GOOD, "The presubmit.yml file is valid.")

     def verify_module_name_conflict(self):
         """Verify no module name conflict when ignoring case sensitivity."""
         module_names = self.registry.get_all_modules()
         conflict_found = False
         module_group = {}
         for name in module_names:
             module_group.setdefault(name.lower(), []).append(name)

         for name, modules in module_group.items():
             if len(modules) > 1:
                 conflict_found = True
                 self.report(BcrValidationResult.FAILED, f"Module name conflict found: {', '.join(modules)}")

         if not conflict_found:
             self.report(BcrValidationResult.GOOD, "No module name conflict found.")

     def validate_module(self, module_name, version, skipped_validations):
         print_expanded_group(f"Validating {module_name}@{version}")
         self.verify_module_name_conflict()
         self.verify_module_existence(module_name, version)
         if "source_repo" not in skipped_validations:
             self.verify_source_archive_url_match_github_repo(module_name, version)
         if "url_stability" not in skipped_validations:
             self.verify_source_archive_url_stability(module_name, version)
         self.verify_source_archive_url_integrity(module_name, version)
         if "presubmit_yml" not in skipped_validations:
             self.verify_presubmit_yml_change(module_name, version)
         self.validate_presubmit_yml(module_name, version)
         self.verify_module_dot_bazel(module_name, version)

     def validate_all_metadata(self):
         print_expanded_group("Validating all metadata.json files")
         has_error = False
         for module_name in self.registry.get_all_modules():
             try:
                 metadata = self.registry.get_metadata(module_name)
             except json.JSONDecodeError as e:
                 self.report(BcrValidationResult.FAILED, f"Failed to load {module_name}'s metadata.json file: " + str(e))
                 has_error = True
                 continue

             sorted_versions = sorted(metadata["versions"], key=Version)
             if sorted_versions != metadata["versions"]:
                 self.report(
                     BcrValidationResult.FAILED,
                     f"{module_name}'s metadata.json file is not sorted by version.\n "
                     f"Sorted versions: {sorted_versions}.\n "
                     f"Original versions: {metadata['versions']}",
                 )
                 has_error = True

             for version in metadata["versions"]:
                 if not self.registry.contains(module_name, version):
                     self.report(
                         BcrValidationResult.FAILED,
                         f"{module_name}@{version} doesn't exist, "
                         f"but it's recorded in {module_name}'s metadata.json file.",
                     )
                     has_error = True
         if not has_error:
             self.report(BcrValidationResult.GOOD, "All metadata.json files are valid.")

     def getValidationReturnCode(self):
         # Calculate the overall return code
         # 0: All good
         # 1: BCR validation failed
         # 42: BCR validation passes, but some changes need BCR maintainer review before trigging follow up BCR presubmit jobs.
         result_codes = [code for code, _ in self.validation_results]
         if BcrValidationResult.FAILED in result_codes:
             return 1
         if BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW in result_codes:
             # Use a special return code to avoid conflict with other error code
             return 42
         return 0


 def main(argv=None):
     if argv is None:
         argv = sys.argv[1:]

     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--registry",
         type=str,
         default=".",
         help="Specify the root path of the registry (default: the current working directory).",
     )
     parser.add_argument(
         "--check",
         type=str,
         action="append",
         help="Specify a Bazel module version you want to perform the BCR check on."
         + " (e.g. bazel_skylib@1.3.0). If no version is specified, all versions of that module are checked."
         + " This flag can be repeated to accept multiple module versions.",
     )
     parser.add_argument(
         "--check_all", action="store_true", help="Check all Bazel modules in the registry, ignore other --check flags."
     )
     parser.add_argument(
         "--check_all_metadata", action="store_true", help="Check all Bazel module metadata in the registry."
     )
     parser.add_argument(
         "--fix", action="store_true", help="Should the script try to fix the detected validation errors."
     )
     parser.add_argument(
         "--skip_validation",
         type=str,
         default=[],
         action="append",
         help='Bypass the given step for validating modules. Supported values are: "url_stability", '
         + 'to bypass the URL stability check; "presubmit_yml", to bypass the presubmit.yml check; '
         + '"source_repo", to bypass the source repo verification; '
         + "This flag can be repeated to skip multiple validations.",
     )

     args = parser.parse_args(argv)

     if not args.check_all and not args.check and not args.check_all_metadata:
         parser.print_help()
         return -1

     registry = RegistryClient(args.registry)

     # Parse what module versions we should validate
     module_versions = parse_module_versions(registry, args.check_all, args.check)
     if module_versions:
         print_expanded_group("Module versions to be validated:")
         for name, version in module_versions:
             print(f"{name}@{version}")

     # Validate given module version.
     validator = BcrValidator(registry, args.fix)
     for name, version in module_versions:
         validator.validate_module(name, version, args.skip_validation)

     if args.check_all_metadata:
         validator.validate_all_metadata()

     return validator.getValidationReturnCode()


 if __name__ == "__main__":
     # Under 'bazel run' we want to run within the source folder instead of the execroot.
     if os.getenv("BUILD_WORKSPACE_DIRECTORY"):
         os.chdir(os.getenv("BUILD_WORKSPACE_DIRECTORY"))
     sys.exit(main())
	#!/usr/bin/env python3
	#
	# Copyright 2022 The Bazel Authors. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# pylint: disable=invalid-name
	# pylint: disable=line-too-long
	# pylint: disable=missing-function-docstring
	"""A script to perform BCR validations for Bazel modules

	Validations performed are:
	- Verify versions in metadata.json matches existing versions
	- Verify the source archive URL match the source repositories
	- Verify the source archive URL is stable
	- Verify if the presubmit.yml file matches the previous version
	- If not, we should require BCR maintainer review.
	- Verify the checked in MODULE.bazel file matches the one in the extracted and patched source tree.
	"""

	import argparse
	import json
	import subprocess
	from pathlib import Path
	import shutil
	import sys
	import tempfile
	import os
	import yaml

	from enum import Enum
	from difflib import unified_diff
	from urllib.parse import urlparse

	from registry import RegistryClient
	from registry import Version
	from registry import download
	from registry import download_file
	from registry import integrity
	from registry import read
	from verify_stable_archives import UrlStability
	from verify_stable_archives import verify_stable_archive


	class BcrValidationResult(Enum):
	GOOD = 1
	NEED_BCR_MAINTAINER_REVIEW = 2
	FAILED = 3


	RED = "\x1b[31m"
	GREEN = "\x1b[32m"
	YELLOW = "\x1b[33m"
	RESET = "\x1b[0m"

	COLOR = {
	BcrValidationResult.GOOD: GREEN,
	BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW: YELLOW,
	BcrValidationResult.FAILED: RED,
	}


	def print_collapsed_group(name):
	print("\n\n--- {0}\n\n".format(name))


	def print_expanded_group(name):
	print("\n\n+++ {0}\n\n".format(name))


	def parse_module_versions(registry, check_all, inputs):
	"""Parse module versions to be validated from input."""
	if check_all:
	return registry.get_all_module_versions()
	if not inputs:
	return []
	result = []
	for s in inputs:
	if "@" in s:
	name, version = s.split("@")
	result.append((name, version))
	else:
	result.extend(registry.get_module_versions(s))
	return result


	def apply_patch(work_dir, patch_strip, patch_file):
	# Requires patch to be installed
	subprocess.run(
	["patch", "-p%d" % patch_strip, "-f", "-l", "-i", patch_file],
	shell=False,
	check=True,
	env=os.environ,
	cwd=work_dir,
	)


	def fix_line_endings(lines):
	return [line.rstrip() + "\n" for line in lines]


	class BcrValidationException(Exception):
	"""
	Raised whenever we should stop the validation immediately.
	"""


	class BcrValidator:
	def __init__(self, registry, should_fix):
	self.validation_results = []
	self.registry = registry
	# Whether the validator should try to fix the detected error.
	self.should_fix = should_fix

	def report(self, type, message):
	color = COLOR[type]
	print(f"{color}{type}{RESET}: {message}\n")
	self.validation_results.append((type, message))

	def verify_module_existence(self, module_name, version):
	"""Verify the directory exists and the version is recorded in metadata.json."""
	if not self.registry.contains(module_name, version):
	self.report(BcrValidationResult.FAILED, f"{module_name}@{version} doesn't exist.")
	raise BcrValidationException("The module to be validated doesn't exist!")
	versions = self.registry.get_metadata(module_name)["versions"]
	if version not in versions:
	self.report(
	BcrValidationResult.FAILED, f"Version {version} is not recorded in {module_name}'s metadata.json file."
	)
	else:
	self.report(BcrValidationResult.GOOD, "The module exists and is recorded in metadata.json.")

	def verify_source_archive_url_match_github_repo(self, module_name, version):
	"""Verify the source archive URL matches the github repo. For now, we only support github repositories check."""
	source_url = self.registry.get_source(module_name, version)["url"]
	source_repositories = self.registry.get_metadata(module_name).get("repository", [])
	matched = not source_repositories
	for source_repository in source_repositories:
	if matched:
	break
	repo_type, repo_path = source_repository.split(":")
	if repo_type == "github":
	parts = urlparse(source_url)
	matched = (
	parts.scheme == "https"
	and parts.netloc == "github.com"
	and os.path.abspath(parts.path).startswith(f"/{repo_path}/")
	)
	elif repo_type == "https":
	repo = urlparse(source_repository)
	parts = urlparse(source_url)
	matched = (
	parts.scheme == repo.scheme
	and parts.netloc == repo.netloc
	and os.path.abspath(parts.path).startswith(f"{repo.path}/")
	)
	if not matched:
	self.report(
	BcrValidationResult.FAILED,
	f"The source URL of {module_name}@{version} ({source_url}) doesn't match any of the module's source repositories {source_repositories}.",
	)
	else:
	self.report(BcrValidationResult.GOOD, "The source URL matches one of the source repositories.")

	def verify_source_archive_url_stability(self, module_name, version):
	"""Verify source archive URL is stable"""
	source_url = self.registry.get_source(module_name, version)["url"]
	if verify_stable_archive(source_url) == UrlStability.UNSTABLE:
	self.report(
	BcrValidationResult.FAILED,
	f"{module_name}@{version} is using an unstable source url: `{source_url}`.\n"
	+ "You should use a release archive URL in the format of "
	+ "`https://github.com/<ORGANIZATION>/<REPO>/releases/download/<version>/<name>.tar.gz` "
	+ "to ensure the archive checksum stability.\n"
	+ "See https://blog.bazel.build/2023/02/15/github-archive-checksum.html for more context.",
	)
	else:
	self.report(BcrValidationResult.GOOD, "The source URL doesn't look unstable.")

	def verify_source_archive_url_integrity(self, module_name, version):
	"""Verify the integrity value of the URL is correct."""
	source_url = self.registry.get_source(module_name, version)["url"]
	expected_integrity = self.registry.get_source(module_name, version)["integrity"]
	algorithm, _ = expected_integrity.split("-", 1)
	real_integrity = integrity(download(source_url), algorithm)
	if real_integrity != expected_integrity:
	self.report(
	BcrValidationResult.FAILED,
	f"{module_name}@{version}'s source archive `{source_url}` has expected integrity value "
	f"`{expected_integrity}`, but the real integrity value is `{real_integrity}`!",
	)
	else:
	self.report(BcrValidationResult.GOOD, "The source archive's integrity value matches.")

	def verify_presubmit_yml_change(self, module_name, version):
	"""Verify if the presubmit.yml is the same as the previous version."""
	versions = self.registry.get_metadata(module_name)["versions"]
	versions.sort(key=Version)
	index = versions.index(version)
	if index == 0:
	self.report(
	BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW,
	f"Module version {module_name}@{version} is new, the presubmit.yml file "
	"should be reviewed by a BCR maintainer.",
	)
	elif index > 0:
	pre_version = versions[index - 1]
	previous_presubmit_yml = self.registry.get_presubmit_yml_path(module_name, pre_version)
	previous_presubmit_content = open(previous_presubmit_yml, "r").readlines()
	current_presubmit_yml = self.registry.get_presubmit_yml_path(module_name, version)
	current_presubmit_content = open(current_presubmit_yml, "r").readlines()
	diff = list(
	unified_diff(
	previous_presubmit_content,
	current_presubmit_content,
	fromfile=str(previous_presubmit_yml),
	tofile=str(current_presubmit_yml),
	)
	)
	if diff:
	self.report(
	BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW,
	f"The presubmit.yml file of {module_name}@{version} doesn't match its previous version "
	f"{module_name}@{pre_version}, the following presubmit.yml file change "
	"should be reviewed by a BCR maintainer.\n " + " ".join(diff),
	)
	else:
	self.report(BcrValidationResult.GOOD, "The presubmit.yml file matches the previous version.")

	def add_module_dot_bazel_patch(self, diff, module_name, version):
	"""Adding a patch file for MODULE.bazel according to the diff result."""
	source = self.registry.get_source(module_name, version)
	patch_file = self.registry.get_patch_file_path(module_name, version, "module_dot_bazel.patch")
	patch_file.parent.mkdir(parents=True, exist_ok=True)
	open(patch_file, "w").writelines(diff)
	source["patch_strip"] = int(source.get("patch_strip", 0))
	patches = source.get("patches", {})
	patches["module_dot_bazel.patch"] = integrity(read(patch_file))
	source["patches"] = patches
	source_json_content = json.dumps(source, indent=4) + "\n"
	self.registry.get_source_json_path(module_name, version).write_text(source_json_content)

	def verify_module_dot_bazel(self, module_name, version):
	source = self.registry.get_source(module_name, version)
	source_url = source["url"]
	tmp_dir = Path(tempfile.mkdtemp())
	archive_file = tmp_dir.joinpath(source_url.split("/")[-1].split("?")[0])
	output_dir = tmp_dir.joinpath("source_root")
	download_file(source_url, archive_file)
	shutil.unpack_archive(str(archive_file), output_dir)

	# Apply patch files if there are any, also verify their integrity values
	source_root = output_dir.joinpath(source["strip_prefix"] if "strip_prefix" in source else "")
	if "patches" in source:
	for patch_name, expected_integrity in source["patches"].items():
	patch_file = self.registry.get_patch_file_path(module_name, version, patch_name)
	actual_integrity = integrity(read(patch_file))
	if actual_integrity != expected_integrity:
	self.report(
	BcrValidationResult.FAILED,
	f"The patch file `{patch_file}` has expected integrity value `{expected_integrity}`, "
	f"but the real integrity value is `{actual_integrity}`.",
	)
	apply_patch(source_root, source["patch_strip"], str(patch_file.resolve()))
	if "overlay" in source:
	overlay_dir = self.registry.get_overlay_dir(module_name, version)
	for overlay_file, expected_integrity in source["overlay"].items():
	overlay_src = overlay_dir / overlay_file
	overlay_dst = source_root / overlay_file
	try:
	overlay_dst.resolve().relative_to(source_root)
	except ValueError:
	self.report(
	BcrValidationResult.FAILED,
	f"The overlay file path `{overlay_file}` must point inside the source archive.",
	)
	continue
	try:
	actual_integrity = integrity(read(overlay_src))
	except FileNotFoundError:
	self.report(BcrValidationResult.FAILED, f"The overlay file `{overlay_file}` does not exist")
	continue
	if actual_integrity != expected_integrity:
	self.report(
	BcrValidationResult.FAILED,
	f"The overlay file `{overlay_file}` has expected integrity value `{expected_integrity}`, "
	f"but the real integrity value is `{actual_integrity}`.",
	)
	continue
	overlay_dst.parent.mkdir(parents=True, exist_ok=True)
	shutil.copy2(overlay_src, overlay_dst)

	source_module_dot_bazel = source_root.joinpath("MODULE.bazel")
	if source_module_dot_bazel.exists():
	source_module_dot_bazel_content = open(source_module_dot_bazel, "r").readlines()
	else:
	source_module_dot_bazel_content = []
	bcr_module_dot_bazel_content = open(
	self.registry.get_module_dot_bazel_path(module_name, version), "r"
	).readlines()
	source_module_dot_bazel_content = fix_line_endings(source_module_dot_bazel_content)
	bcr_module_dot_bazel_content = fix_line_endings(bcr_module_dot_bazel_content)
	file_name = "a/" * int(source.get("patch_strip", 0)) + "MODULE.bazel"
	diff = list(
	unified_diff(
	source_module_dot_bazel_content, bcr_module_dot_bazel_content, fromfile=file_name, tofile=file_name
	)
	)

	if diff:
	self.report(
	BcrValidationResult.FAILED,
	"Checked in MODULE.bazel file doesn't match the one in the extracted and patched sources.\n"
	+ f"Please fix the MODULE.bazel file or you can add the following patch to {module_name}@{version}:\n"
	+ " "
	+ " ".join(diff),
	)
	if self.should_fix:
	self.add_module_dot_bazel_patch(diff, module_name, version)
	else:
	self.report(BcrValidationResult.GOOD, "Checked in MODULE.bazel matches the sources.")

	shutil.rmtree(tmp_dir)

	def check_if_bazel_version_is_set(self, tasks):
	for task_name, task_config in tasks.items():
	if "bazel" not in task_config:
	self.report(
	BcrValidationResult.FAILED,
	"Missing bazel version for task '%s' in the presubmit.yml file." % task_name,
	)

	def validate_presubmit_yml(self, module_name, version):
	presubmit_yml = self.registry.get_presubmit_yml_path(module_name, version)
	presubmit = yaml.safe_load(open(presubmit_yml, "r"))
	report_num_old = len(self.validation_results)
	tasks = presubmit.get("tasks", {})
	self.check_if_bazel_version_is_set(tasks)
	test_module_tasks = {}
	if "bcr_test_module" in presubmit:
	test_module_tasks = presubmit["bcr_test_module"].get("tasks", {})
	self.check_if_bazel_version_is_set(test_module_tasks)
	if not tasks and not test_module_tasks:
	self.report(BcrValidationResult.FAILED, "At least one task should be specified in the presubmit.yml file.")
	report_num_new = len(self.validation_results)
	if report_num_new == report_num_old:
	self.report(BcrValidationResult.GOOD, "The presubmit.yml file is valid.")

	def verify_module_name_conflict(self):
	"""Verify no module name conflict when ignoring case sensitivity."""
	module_names = self.registry.get_all_modules()
	conflict_found = False
	module_group = {}
	for name in module_names:
	module_group.setdefault(name.lower(), []).append(name)

	for name, modules in module_group.items():
	if len(modules) > 1:
	conflict_found = True
	self.report(BcrValidationResult.FAILED, f"Module name conflict found: {', '.join(modules)}")

	if not conflict_found:
	self.report(BcrValidationResult.GOOD, "No module name conflict found.")

	def validate_module(self, module_name, version, skipped_validations):
	print_expanded_group(f"Validating {module_name}@{version}")
	self.verify_module_name_conflict()
	self.verify_module_existence(module_name, version)
	if "source_repo" not in skipped_validations:
	self.verify_source_archive_url_match_github_repo(module_name, version)
	if "url_stability" not in skipped_validations:
	self.verify_source_archive_url_stability(module_name, version)
	self.verify_source_archive_url_integrity(module_name, version)
	if "presubmit_yml" not in skipped_validations:
	self.verify_presubmit_yml_change(module_name, version)
	self.validate_presubmit_yml(module_name, version)
	self.verify_module_dot_bazel(module_name, version)

	def validate_all_metadata(self):
	print_expanded_group("Validating all metadata.json files")
	has_error = False
	for module_name in self.registry.get_all_modules():
	try:
	metadata = self.registry.get_metadata(module_name)
	except json.JSONDecodeError as e:
	self.report(BcrValidationResult.FAILED, f"Failed to load {module_name}'s metadata.json file: " + str(e))
	has_error = True
	continue

	sorted_versions = sorted(metadata["versions"], key=Version)
	if sorted_versions != metadata["versions"]:
	self.report(
	BcrValidationResult.FAILED,
	f"{module_name}'s metadata.json file is not sorted by version.\n "
	f"Sorted versions: {sorted_versions}.\n "
	f"Original versions: {metadata['versions']}",
	)
	has_error = True

	for version in metadata["versions"]:
	if not self.registry.contains(module_name, version):
	self.report(
	BcrValidationResult.FAILED,
	f"{module_name}@{version} doesn't exist, "
	f"but it's recorded in {module_name}'s metadata.json file.",
	)
	has_error = True
	if not has_error:
	self.report(BcrValidationResult.GOOD, "All metadata.json files are valid.")

	def getValidationReturnCode(self):
	# Calculate the overall return code
	# 0: All good
	# 1: BCR validation failed
	# 42: BCR validation passes, but some changes need BCR maintainer review before trigging follow up BCR presubmit jobs.
	result_codes = [code for code, _ in self.validation_results]
	if BcrValidationResult.FAILED in result_codes:
	return 1
	if BcrValidationResult.NEED_BCR_MAINTAINER_REVIEW in result_codes:
	# Use a special return code to avoid conflict with other error code
	return 42
	return 0


	def main(argv=None):
	if argv is None:
	argv = sys.argv[1:]

	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--registry",
	type=str,
	default=".",
	help="Specify the root path of the registry (default: the current working directory).",
	)
	parser.add_argument(
	"--check",
	type=str,
	action="append",
	help="Specify a Bazel module version you want to perform the BCR check on."
	+ " (e.g. bazel_skylib@1.3.0). If no version is specified, all versions of that module are checked."
	+ " This flag can be repeated to accept multiple module versions.",
	)
	parser.add_argument(
	"--check_all", action="store_true", help="Check all Bazel modules in the registry, ignore other --check flags."
	)
	parser.add_argument(
	"--check_all_metadata", action="store_true", help="Check all Bazel module metadata in the registry."
	)
	parser.add_argument(
	"--fix", action="store_true", help="Should the script try to fix the detected validation errors."
	)
	parser.add_argument(
	"--skip_validation",
	type=str,
	default=[],
	action="append",
	help='Bypass the given step for validating modules. Supported values are: "url_stability", '
	+ 'to bypass the URL stability check; "presubmit_yml", to bypass the presubmit.yml check; '
	+ '"source_repo", to bypass the source repo verification; '
	+ "This flag can be repeated to skip multiple validations.",
	)

	args = parser.parse_args(argv)

	if not args.check_all and not args.check and not args.check_all_metadata:
	parser.print_help()
	return -1

	registry = RegistryClient(args.registry)

	# Parse what module versions we should validate
	module_versions = parse_module_versions(registry, args.check_all, args.check)
	if module_versions:
	print_expanded_group("Module versions to be validated:")
	for name, version in module_versions:
	print(f"{name}@{version}")

	# Validate given module version.
	validator = BcrValidator(registry, args.fix)
	for name, version in module_versions:
	validator.validate_module(name, version, args.skip_validation)

	if args.check_all_metadata:
	validator.validate_all_metadata()

	return validator.getValidationReturnCode()


	if __name__ == "__main__":
	# Under 'bazel run' we want to run within the source folder instead of the execroot.
	if os.getenv("BUILD_WORKSPACE_DIRECTORY"):
	os.chdir(os.getenv("BUILD_WORKSPACE_DIRECTORY"))
	sys.exit(main())