feat(gazelle): Remove integrity field from Gazelle manifest (#1666)

As per the discussion in #1465 and in this PR, this PR does the
following:

- Make the `requirements` parameter to the `gazelle_python_manifest`
macro optional.
- If `requirements` is not provided, no integrity field is added to the
manifest, and `diff_test` is used to see if the manifest is stale
instead of the existing `go_test` that just checks the integrity.
- Migrates `go_binary` to `genrule` to generate the manifest file that
can be used with `diff_test`. (There's still a `go_binary` under the
hood, but the manifest macro itself uses a genrule now rather than a
wrapper `go_binary`.)
- It does not migrate the manifest generator from Go to Python;
hopefully that can be deferred to another PR.

A custom `copy_to_source.sh` script is included, which is used to update
the manifest in the source tree.

Compatibility discussion:
- The update and test target names have not changed; they are still
`//:gazelle_python_manifest.update` and
`//:gazelle_python_manifest.test`.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4bb076d..5923c5e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -164,6 +164,10 @@
   target for each file with `if __name__ == "__main__"` instead of just one
   `py_binary` for the whole module.
 
+* (gazelle) the Gazelle manifest integrity field is now optional. If the
+  `requirements` argument to `gazelle_python_manifest` is unset, no integrity
+  field will be generated.
+
 ### Fixed
 
 * (gazelle) The gazelle plugin helper was not working with Python toolchains 3.11
diff --git a/examples/bzlmod_build_file_generation/.bazelversion b/examples/bzlmod_build_file_generation/.bazelversion
index 09b254e..19b860c 100644
--- a/examples/bzlmod_build_file_generation/.bazelversion
+++ b/examples/bzlmod_build_file_generation/.bazelversion
@@ -1 +1 @@
-6.0.0
+6.4.0
diff --git a/examples/bzlmod_build_file_generation/BUILD.bazel b/examples/bzlmod_build_file_generation/BUILD.bazel
index bca3b36..33d01f4 100644
--- a/examples/bzlmod_build_file_generation/BUILD.bazel
+++ b/examples/bzlmod_build_file_generation/BUILD.bazel
@@ -29,6 +29,8 @@
     exclude_patterns = [
         "^_|(\\._)+",  # This is the default.
         "(\\.tests)+",  # Add a custom one to get rid of the psutil tests.
+        "^colorama",  # Get rid of colorama on Windows.
+        "^lazy_object_proxy\\.cext$",  # Get rid of this on Linux because it isn't included on Windows.
     ],
     wheels = all_whl_requirements,
 )
@@ -47,10 +49,6 @@
     name = "gazelle_python_manifest",
     modules_mapping = ":modules_map",
     pip_repository_name = "pip",
-    requirements = [
-        "//:requirements_lock.txt",
-        "//:requirements_windows.txt",
-    ],
     tags = ["exclusive"],
 )
 
diff --git a/examples/bzlmod_build_file_generation/gazelle_python.yaml b/examples/bzlmod_build_file_generation/gazelle_python.yaml
index 46a1c8b..ef01460 100644
--- a/examples/bzlmod_build_file_generation/gazelle_python.yaml
+++ b/examples/bzlmod_build_file_generation/gazelle_python.yaml
@@ -586,4 +586,3 @@
     yamllint.rules.truthy: yamllint
   pip_repository:
     name: pip
-integrity: cd25503dc6b3d9e1c5f46715ba2d0499ecc8b3d654ebcbf9f4e52f2074290e0a
diff --git a/gazelle/MODULE.bazel b/gazelle/MODULE.bazel
index 8c6ad19..940a263 100644
--- a/gazelle/MODULE.bazel
+++ b/gazelle/MODULE.bazel
@@ -4,6 +4,7 @@
     compatibility_level = 1,
 )
 
+bazel_dep(name = "bazel_skylib", version = "1.5.0")
 bazel_dep(name = "rules_python", version = "0.18.0")
 bazel_dep(name = "rules_go", version = "0.41.0", repo_name = "io_bazel_rules_go")
 bazel_dep(name = "gazelle", version = "0.33.0", repo_name = "bazel_gazelle")
diff --git a/gazelle/README.md b/gazelle/README.md
index 0a5b104..c1221a6 100644
--- a/gazelle/README.md
+++ b/gazelle/README.md
@@ -114,6 +114,10 @@
     pip_repository_name = "pip",
     # This should point to wherever we declare our python dependencies
     # (the same as what we passed to the modules_mapping rule in WORKSPACE)
+    # This argument is optional. If provided, the `.test` target is very
+    # fast because it just has to check an integrity field. If not provided,
+    # the integrity field is not added to the manifest which can help avoid
+    # merge conflicts in large repos.
     requirements = "//:requirements_lock.txt",
 )
 ```
diff --git a/gazelle/manifest/BUILD.bazel b/gazelle/manifest/BUILD.bazel
index fc7fa09..33b5a46 100644
--- a/gazelle/manifest/BUILD.bazel
+++ b/gazelle/manifest/BUILD.bazel
@@ -1,5 +1,10 @@
 load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
 
+exports_files([
+    # This gets wrapped up into a py_binary with args inside of the gazelle_python_manifest macro.
+    "copy_to_source.py",
+])
+
 go_library(
     name = "manifest",
     srcs = ["manifest.go"],
diff --git a/gazelle/manifest/copy_to_source.py b/gazelle/manifest/copy_to_source.py
new file mode 100644
index 0000000..6854192
--- /dev/null
+++ b/gazelle/manifest/copy_to_source.py
@@ -0,0 +1,36 @@
+"""Copy a generated file to the source tree.
+
+Run like:
+    copy_to_source path/to/generated_file path/to/source_file_to_overwrite
+"""
+
+import os
+import shutil
+import stat
+import sys
+from pathlib import Path
+
+
+def copy_to_source(generated_relative_path: Path, target_relative_path: Path) -> None:
+    """Copy the generated file to the target file path.
+
+    Expands the relative paths by looking at Bazel env vars to figure out which absolute paths to use.
+    """
+    # This script normally gets executed from the runfiles dir, so find the absolute path to the generated file based on that.
+    generated_absolute_path = Path.cwd() / generated_relative_path
+
+    # Similarly, the target is relative to the source directory.
+    target_absolute_path = os.getenv("BUILD_WORKSPACE_DIRECTORY") / target_relative_path
+
+    print(f"Copying {generated_absolute_path} to {target_absolute_path}")
+    target_absolute_path.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy(generated_absolute_path, target_absolute_path)
+
+    target_absolute_path.chmod(0O664)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        sys.exit("Usage: copy_to_source <generated_file> <target_file>")
+
+    copy_to_source(Path(sys.argv[1]), Path(sys.argv[2]))
diff --git a/gazelle/manifest/defs.bzl b/gazelle/manifest/defs.bzl
index ef0f275..ccabfd2 100644
--- a/gazelle/manifest/defs.bzl
+++ b/gazelle/manifest/defs.bzl
@@ -16,12 +16,14 @@
 for updating and testing the Gazelle manifest file.
 """
 
-load("@io_bazel_rules_go//go:def.bzl", "GoSource", "go_binary", "go_test")
+load("@bazel_skylib//rules:diff_test.bzl", "diff_test")
+load("@io_bazel_rules_go//go:def.bzl", "GoSource", "go_test")
+load("@rules_python//python:defs.bzl", "py_binary")
 
 def gazelle_python_manifest(
         name,
-        requirements,
         modules_mapping,
+        requirements = [],
         pip_repository_name = "",
         pip_deps_repository_name = "",
         manifest = ":gazelle_python.yaml",
@@ -30,15 +32,18 @@
 
     Args:
         name: the name used as a base for the targets.
+        modules_mapping: the target for the generated modules_mapping.json file.
         requirements: the target for the requirements.txt file or a list of
             requirements files that will be concatenated before passing on to
-            the manifest generator.
+            the manifest generator. If unset, no integrity field is added to the
+            manifest, meaning testing it is just as expensive as generating it,
+            but modifying it is much less likely to result in a merge conflict.
         pip_repository_name: the name of the pip_install or pip_repository target.
         pip_deps_repository_name: deprecated - the old pip_install target name.
-        modules_mapping: the target for the generated modules_mapping.json file.
-        manifest: the target for the Gazelle manifest file.
-        **kwargs: other bazel attributes passed to the target target generated by
-            this macro.
+        manifest: the Gazelle manifest file.
+            defaults to the same value as manifest.
+        **kwargs: other bazel attributes passed to the generate and test targets
+            generated by this macro.
     """
     if pip_deps_repository_name != "":
         # buildifier: disable=print
@@ -52,12 +57,17 @@
         # This is a temporary check while pip_deps_repository_name exists as deprecated.
         fail("pip_repository_name must be set in //{}:{}".format(native.package_name(), name))
 
+    test_target = "{}.test".format(name)
     update_target = "{}.update".format(name)
     update_target_label = "//{}:{}".format(native.package_name(), update_target)
 
+    manifest_genrule = name + ".genrule"
+    generated_manifest = name + ".generated_manifest"
+    manifest_generator = Label("//manifest/generate:generate")
     manifest_generator_hash = Label("//manifest/generate:generate_lib_sources_hash")
 
-    if type(requirements) == "list":
+    if requirements and type(requirements) == "list":
+        # This runs if requirements is a list or is unset (default value is empty list)
         native.genrule(
             name = name + "_requirements_gen",
             srcs = sorted(requirements),
@@ -68,56 +78,71 @@
         requirements = name + "_requirements_gen"
 
     update_args = [
-        "--manifest-generator-hash",
-        "$(rootpath {})".format(manifest_generator_hash),
-        "--requirements",
-        "$(rootpath {})".format(requirements),
-        "--pip-repository-name",
-        pip_repository_name,
-        "--modules-mapping",
-        "$(rootpath {})".format(modules_mapping),
-        "--output",
-        "$(rootpath {})".format(manifest),
-        "--update-target",
-        update_target_label,
+        "--manifest-generator-hash=$(execpath {})".format(manifest_generator_hash),
+        "--requirements=$(rootpath {})".format(requirements) if requirements else "--requirements=",
+        "--pip-repository-name={}".format(pip_repository_name),
+        "--modules-mapping=$(execpath {})".format(modules_mapping),
+        "--output=$(execpath {})".format(generated_manifest),
+        "--update-target={}".format(update_target_label),
     ]
 
-    go_binary(
-        name = update_target,
-        embed = [Label("//manifest/generate:generate_lib")],
-        data = [
-            manifest,
+    native.genrule(
+        name = manifest_genrule,
+        outs = [generated_manifest],
+        cmd = "$(execpath {}) {}".format(manifest_generator, " ".join(update_args)),
+        tools = [manifest_generator],
+        srcs = [
             modules_mapping,
-            requirements,
             manifest_generator_hash,
-        ],
-        args = update_args,
-        visibility = ["//visibility:private"],
-        tags = ["manual"],
+        ] + ([requirements] if requirements else []),
     )
 
-    attrs = {
-        "env": {
-            "_TEST_MANIFEST": "$(rootpath {})".format(manifest),
-            "_TEST_MANIFEST_GENERATOR_HASH": "$(rootpath {})".format(manifest_generator_hash),
-            "_TEST_REQUIREMENTS": "$(rootpath {})".format(requirements),
-        },
-        "size": "small",
-    }
-    go_test(
-        name = "{}.test".format(name),
-        srcs = [Label("//manifest/test:test.go")],
-        data = [
-            manifest,
-            requirements,
-            manifest_generator_hash,
+    py_binary(
+        name = update_target,
+        srcs = [Label("//manifest:copy_to_source.py")],
+        main = Label("//manifest:copy_to_source.py"),
+        args = [
+            "$(rootpath {})".format(generated_manifest),
+            "$(rootpath {})".format(manifest),
         ],
-        rundir = ".",
-        deps = [Label("//manifest")],
-        # kwargs could contain test-specific attributes like size or timeout
-        **dict(attrs, **kwargs)
+        data = [
+            generated_manifest,
+            manifest,
+        ],
+        **kwargs
     )
 
+    if requirements:
+        attrs = {
+            "env": {
+                "_TEST_MANIFEST": "$(rootpath {})".format(manifest),
+                "_TEST_MANIFEST_GENERATOR_HASH": "$(rootpath {})".format(manifest_generator_hash),
+                "_TEST_REQUIREMENTS": "$(rootpath {})".format(requirements),
+            },
+            "size": "small",
+        }
+        go_test(
+            name = test_target,
+            srcs = [Label("//manifest/test:test.go")],
+            data = [
+                manifest,
+                requirements,
+                manifest_generator_hash,
+            ],
+            rundir = ".",
+            deps = [Label("//manifest")],
+            # kwargs could contain test-specific attributes like size or timeout
+            **dict(attrs, **kwargs)
+        )
+    else:
+        diff_test(
+            name = test_target,
+            file1 = generated_manifest,
+            file2 = manifest,
+            failure_message = "Gazelle manifest is out of date. Run 'bazel run {}' to update it.".format(native.package_relative_label(update_target)),
+            **kwargs
+        )
+
     native.filegroup(
         name = name,
         srcs = [manifest],
diff --git a/gazelle/manifest/generate/generate.go b/gazelle/manifest/generate/generate.go
index bdd0206..19ca08a 100644
--- a/gazelle/manifest/generate/generate.go
+++ b/gazelle/manifest/generate/generate.go
@@ -31,12 +31,6 @@
 	"github.com/bazelbuild/rules_python/gazelle/manifest"
 )
 
-func init() {
-	if os.Getenv("BUILD_WORKSPACE_DIRECTORY") == "" {
-		log.Fatalln("ERROR: this program must run under Bazel")
-	}
-}
-
 func main() {
 	var (
 		manifestGeneratorHashPath string
@@ -79,10 +73,6 @@
 		"The Bazel target to update the YAML manifest file.")
 	flag.Parse()
 
-	if requirementsPath == "" {
-		log.Fatalln("ERROR: --requirements must be set")
-	}
-
 	if modulesMappingPath == "" {
 		log.Fatalln("ERROR: --modules-mapping must be set")
 	}
@@ -102,12 +92,12 @@
 
 	header := generateHeader(updateTarget)
 	repository := manifest.PipRepository{
-		Name:                    pipRepositoryName,
+		Name: pipRepositoryName,
 	}
 
 	manifestFile := manifest.NewFile(&manifest.Manifest{
 		ModulesMapping: modulesMapping,
-		PipRepository: &repository,
+		PipRepository:  &repository,
 	})
 	if err := writeOutput(
 		outputPath,
@@ -155,12 +145,7 @@
 	manifestGeneratorHashPath string,
 	requirementsPath string,
 ) error {
-	stat, err := os.Stat(outputPath)
-	if err != nil {
-		return fmt.Errorf("failed to write output: %w", err)
-	}
-
-	outputFile, err := os.OpenFile(outputPath, os.O_WRONLY|os.O_TRUNC, stat.Mode())
+	outputFile, err := os.OpenFile(outputPath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
 	if err != nil {
 		return fmt.Errorf("failed to write output: %w", err)
 	}
@@ -170,20 +155,26 @@
 		return fmt.Errorf("failed to write output: %w", err)
 	}
 
-	manifestGeneratorHash, err := os.Open(manifestGeneratorHashPath)
-	if err != nil {
-		return fmt.Errorf("failed to write output: %w", err)
-	}
-	defer manifestGeneratorHash.Close()
+	if requirementsPath != "" {
+		manifestGeneratorHash, err := os.Open(manifestGeneratorHashPath)
+		if err != nil {
+			return fmt.Errorf("failed to write output: %w", err)
+		}
+		defer manifestGeneratorHash.Close()
 
-	requirements, err := os.Open(requirementsPath)
-	if err != nil {
-		return fmt.Errorf("failed to write output: %w", err)
-	}
-	defer requirements.Close()
+		requirements, err := os.Open(requirementsPath)
+		if err != nil {
+			return fmt.Errorf("failed to write output: %w", err)
+		}
+		defer requirements.Close()
 
-	if err := manifestFile.Encode(outputFile, manifestGeneratorHash, requirements); err != nil {
-		return fmt.Errorf("failed to write output: %w", err)
+		if err := manifestFile.EncodeWithIntegrity(outputFile, manifestGeneratorHash, requirements); err != nil {
+			return fmt.Errorf("failed to write output: %w", err)
+		}
+	} else {
+		if err := manifestFile.EncodeWithoutIntegrity(outputFile); err != nil {
+			return fmt.Errorf("failed to write output: %w", err)
+		}
 	}
 
 	return nil
diff --git a/gazelle/manifest/manifest.go b/gazelle/manifest/manifest.go
index fb146f9..26b0dfb 100644
--- a/gazelle/manifest/manifest.go
+++ b/gazelle/manifest/manifest.go
@@ -31,7 +31,7 @@
 	// Integrity is the hash of the requirements.txt file and the Manifest for
 	// ensuring the integrity of the entire gazelle_python.yaml file. This
 	// controls the testing to keep the gazelle_python.yaml file up-to-date.
-	Integrity string `yaml:"integrity"`
+	Integrity string `yaml:"integrity,omitempty"`
 }
 
 // NewFile creates a new File with a given Manifest.
@@ -40,12 +40,21 @@
 }
 
 // Encode encodes the manifest file to the given writer.
-func (f *File) Encode(w io.Writer, manifestGeneratorHashFile, requirements io.Reader) error {
+func (f *File) EncodeWithIntegrity(w io.Writer, manifestGeneratorHashFile, requirements io.Reader) error {
 	integrityBytes, err := f.calculateIntegrity(manifestGeneratorHashFile, requirements)
 	if err != nil {
 		return fmt.Errorf("failed to encode manifest file: %w", err)
 	}
 	f.Integrity = fmt.Sprintf("%x", integrityBytes)
+
+	return f.encode(w)
+}
+
+func (f *File) EncodeWithoutIntegrity(w io.Writer) error {
+	return f.encode(w)
+}
+
+func (f *File) encode(w io.Writer) error {
 	encoder := yaml.NewEncoder(w)
 	defer encoder.Close()
 	if err := encoder.Encode(f); err != nil {
diff --git a/gazelle/manifest/manifest_test.go b/gazelle/manifest/manifest_test.go
index 43c4099..2749733 100644
--- a/gazelle/manifest/manifest_test.go
+++ b/gazelle/manifest/manifest_test.go
@@ -40,7 +40,7 @@
 const pipDepsRepositoryName = "test_repository_name"
 
 func TestFile(t *testing.T) {
-	t.Run("Encode", func(t *testing.T) {
+	t.Run("EncodeWithIntegrity", func(t *testing.T) {
 		f := manifest.NewFile(&manifest.Manifest{
 			ModulesMapping:        modulesMapping,
 			PipDepsRepositoryName: pipDepsRepositoryName,
@@ -53,7 +53,7 @@
 			t.FailNow()
 		}
 		defer requirements.Close()
-		if err := f.Encode(&b, manifestGeneratorHashFile, requirements); err != nil {
+		if err := f.EncodeWithIntegrity(&b, manifestGeneratorHashFile, requirements); err != nil {
 			log.Println(err)
 			t.FailNow()
 		}