feat: gazelle manifest exclude_patterns (#917)
* feat: exclude_patterns for gazelle manifest
Signed-off-by: Thulio Ferraz Assis <3149049+f0rmiga@users.noreply.github.com>
* feat: force gazelle manifest update on logic change
Signed-off-by: Thulio Ferraz Assis <3149049+f0rmiga@users.noreply.github.com>
Signed-off-by: Thulio Ferraz Assis <3149049+f0rmiga@users.noreply.github.com>
diff --git a/examples/build_file_generation/BUILD b/examples/build_file_generation/BUILD
index ef9e967..9204a0e 100644
--- a/examples/build_file_generation/BUILD
+++ b/examples/build_file_generation/BUILD
@@ -4,11 +4,23 @@
load("@rules_python//gazelle/manifest:defs.bzl", "gazelle_python_manifest")
load("@rules_python//gazelle/modules_mapping:def.bzl", "modules_mapping")
load("@rules_python//python:defs.bzl", "py_binary", "py_library")
+load("@rules_python//python:pip.bzl", "compile_pip_requirements")
+
+compile_pip_requirements(
+ name = "requirements",
+ extra_args = ["--allow-unsafe"],
+ requirements_in = "requirements.txt",
+ requirements_txt = "requirements_lock.txt",
+)
# This rule fetches the metadata for python packages we depend on. That data is
# required for the gazelle_python_manifest rule to update our manifest file.
modules_mapping(
name = "modules_map",
+ exclude_patterns = [
+ "^_|(\\._)+", # This is the default.
+ "(\\.tests)+", # Add a custom one to get rid of the psutil tests.
+ ],
wheels = all_whl_requirements,
)
diff --git a/examples/build_file_generation/gazelle_python.yaml b/examples/build_file_generation/gazelle_python.yaml
index 8e68c1d..f25f59e 100644
--- a/examples/build_file_generation/gazelle_python.yaml
+++ b/examples/build_file_generation/gazelle_python.yaml
@@ -6,18 +6,14 @@
manifest:
modules_mapping:
certifi: certifi
- certifi.__init__: certifi
- certifi.__main__: certifi
certifi.core: certifi
chardet: chardet
- chardet.__init__: chardet
chardet.big5freq: chardet
chardet.big5prober: chardet
chardet.chardistribution: chardet
chardet.charsetgroupprober: chardet
chardet.charsetprober: chardet
chardet.cli: chardet
- chardet.cli.__init__: chardet
chardet.cli.chardetect: chardet
chardet.codingstatemachine: chardet
chardet.compat: chardet
@@ -53,7 +49,6 @@
chardet.utf8prober: chardet
chardet.version: chardet
idna: idna
- idna.__init__: idna
idna.codec: idna
idna.compat: idna
idna.core: idna
@@ -61,10 +56,8 @@
idna.intranges: idna
idna.package_data: idna
idna.uts46data: idna
+ psutil: psutil
requests: requests
- requests.__init__: requests
- requests.__version__: requests
- requests._internal_utils: requests
requests.adapters: requests
requests.api: requests
requests.auth: requests
@@ -81,18 +74,9 @@
requests.structures: requests
requests.utils: requests
urllib3: urllib3
- urllib3.__init__: urllib3
- urllib3._collections: urllib3
- urllib3._version: urllib3
urllib3.connection: urllib3
urllib3.connectionpool: urllib3
urllib3.contrib: urllib3
- urllib3.contrib.__init__: urllib3
- urllib3.contrib._appengine_environ: urllib3
- urllib3.contrib._securetransport: urllib3
- urllib3.contrib._securetransport.__init__: urllib3
- urllib3.contrib._securetransport.bindings: urllib3
- urllib3.contrib._securetransport.low_level: urllib3
urllib3.contrib.appengine: urllib3
urllib3.contrib.ntlmpool: urllib3
urllib3.contrib.pyopenssl: urllib3
@@ -102,19 +86,14 @@
urllib3.fields: urllib3
urllib3.filepost: urllib3
urllib3.packages: urllib3
- urllib3.packages.__init__: urllib3
urllib3.packages.backports: urllib3
- urllib3.packages.backports.__init__: urllib3
urllib3.packages.backports.makefile: urllib3
urllib3.packages.six: urllib3
urllib3.packages.ssl_match_hostname: urllib3
- urllib3.packages.ssl_match_hostname.__init__: urllib3
- urllib3.packages.ssl_match_hostname._implementation: urllib3
urllib3.poolmanager: urllib3
urllib3.request: urllib3
urllib3.response: urllib3
urllib3.util: urllib3
- urllib3.util.__init__: urllib3
urllib3.util.connection: urllib3
urllib3.util.proxy: urllib3
urllib3.util.queue: urllib3
@@ -129,4 +108,4 @@
pip_repository:
name: pip
incremental: true
-integrity: 4b3eed2cb51741419e11bd12a4533f285d059fda8029deaf6fedfe0fcda1b782
+integrity: 91adaddb7e2d3eb7234e78979ff40b666101ab4df91c62659b954cc9376c2f86
diff --git a/examples/build_file_generation/requirements.txt b/examples/build_file_generation/requirements.txt
index 9d84d35..2851c1e 100644
--- a/examples/build_file_generation/requirements.txt
+++ b/examples/build_file_generation/requirements.txt
@@ -1 +1,2 @@
requests==2.25.1
+psutil==5.9.4
diff --git a/examples/build_file_generation/requirements_lock.txt b/examples/build_file_generation/requirements_lock.txt
index b66c41f..07ff2ec 100644
--- a/examples/build_file_generation/requirements_lock.txt
+++ b/examples/build_file_generation/requirements_lock.txt
@@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with python 3.9
# To update, run:
#
-# pip-compile --generate-hashes --output-file=requirements_lock.txt requirements.txt
+# bazel run //:requirements.update
#
certifi==2020.12.5 \
--hash=sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c \
@@ -16,10 +16,26 @@
--hash=sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6 \
--hash=sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0
# via requests
+psutil==5.9.4 \
+ --hash=sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff \
+ --hash=sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1 \
+ --hash=sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62 \
+ --hash=sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549 \
+ --hash=sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08 \
+ --hash=sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7 \
+ --hash=sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e \
+ --hash=sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe \
+ --hash=sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24 \
+ --hash=sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad \
+ --hash=sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94 \
+ --hash=sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8 \
+ --hash=sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7 \
+ --hash=sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4
+ # via -r ./requirements.txt
requests==2.25.1 \
--hash=sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804 \
--hash=sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e
- # via -r requirements.txt
+ # via -r ./requirements.txt
urllib3==1.26.5 \
--hash=sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c \
--hash=sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098
diff --git a/gazelle/manifest/defs.bzl b/gazelle/manifest/defs.bzl
index 8439319..a5bbe56 100644
--- a/gazelle/manifest/defs.bzl
+++ b/gazelle/manifest/defs.bzl
@@ -2,7 +2,7 @@
for updating and testing the Gazelle manifest file.
"""
-load("@io_bazel_rules_go//go:def.bzl", "go_binary")
+load("@io_bazel_rules_go//go:def.bzl", "GoSource", "go_binary")
def gazelle_python_manifest(
name,
@@ -38,7 +38,11 @@
update_target = "{}.update".format(name)
update_target_label = "//{}:{}".format(native.package_name(), update_target)
+ manifest_generator_hash = Label("//gazelle/manifest/generate:generate_lib_sources_hash")
+
update_args = [
+ "--manifest-generator-hash",
+ "$(rootpath {})".format(manifest_generator_hash),
"--requirements",
"$(rootpath {})".format(requirements),
"--pip-repository-name",
@@ -55,11 +59,12 @@
go_binary(
name = update_target,
- embed = ["@rules_python//gazelle/manifest/generate:generate_lib"],
+ embed = [Label("//gazelle/manifest/generate:generate_lib")],
data = [
manifest,
modules_mapping,
requirements,
+ manifest_generator_hash,
],
args = update_args,
visibility = ["//visibility:private"],
@@ -70,21 +75,23 @@
go_binary(
name = test_binary,
- embed = ["@rules_python//gazelle/manifest/test:test_lib"],
+ embed = [Label("//gazelle/manifest/test:test_lib")],
visibility = ["//visibility:private"],
)
native.sh_test(
name = "{}.test".format(name),
- srcs = ["@rules_python//gazelle/manifest/test:run.sh"],
+ srcs = [Label("//gazelle/manifest/test:run.sh")],
data = [
":{}".format(test_binary),
manifest,
requirements,
+ manifest_generator_hash,
],
env = {
"_TEST_BINARY": "$(rootpath :{})".format(test_binary),
"_TEST_MANIFEST": "$(rootpath {})".format(manifest),
+ "_TEST_MANIFEST_GENERATOR_HASH": "$(rootpath {})".format(manifest_generator_hash),
"_TEST_REQUIREMENTS": "$(rootpath {})".format(requirements),
},
visibility = ["//visibility:private"],
@@ -97,3 +104,56 @@
tags = ["manual"],
visibility = ["//visibility:public"],
)
+
+# buildifier: disable=provider-params
+AllSourcesInfo = provider(fields = {"all_srcs": "All sources collected from the target and dependencies."})
+
+_rules_python_workspace = Label("//:WORKSPACE")
+
+def _get_all_sources_impl(target, ctx):
+ is_rules_python = target.label.workspace_name == _rules_python_workspace.workspace_name
+ if not is_rules_python:
+ # Avoid adding third-party dependency files to the checksum of the srcs.
+ return AllSourcesInfo(all_srcs = depset())
+ srcs = depset(
+ target[GoSource].orig_srcs,
+ transitive = [dep[AllSourcesInfo].all_srcs for dep in ctx.rule.attr.deps],
+ )
+ return [AllSourcesInfo(all_srcs = srcs)]
+
+_get_all_sources = aspect(
+ implementation = _get_all_sources_impl,
+ attr_aspects = ["deps"],
+)
+
+def _sources_hash_impl(ctx):
+ all_srcs = ctx.attr.go_library[AllSourcesInfo].all_srcs
+ hash_file = ctx.actions.declare_file(ctx.attr.name + ".hash")
+ args = ctx.actions.args()
+ args.add(hash_file)
+ args.add_all(all_srcs)
+ ctx.actions.run(
+ outputs = [hash_file],
+ inputs = all_srcs,
+ arguments = [args],
+ executable = ctx.executable._hasher,
+ )
+ return [DefaultInfo(
+ files = depset([hash_file]),
+ runfiles = ctx.runfiles([hash_file]),
+ )]
+
+sources_hash = rule(
+ _sources_hash_impl,
+ attrs = {
+ "go_library": attr.label(
+ aspects = [_get_all_sources],
+ providers = [GoSource],
+ ),
+ "_hasher": attr.label(
+ cfg = "exec",
+ default = Label("//gazelle/manifest/hasher"),
+ executable = True,
+ ),
+ },
+)
diff --git a/gazelle/manifest/generate/BUILD.bazel b/gazelle/manifest/generate/BUILD.bazel
index a8b9cd5..7a5d27f 100644
--- a/gazelle/manifest/generate/BUILD.bazel
+++ b/gazelle/manifest/generate/BUILD.bazel
@@ -1,4 +1,5 @@
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("//gazelle/manifest:defs.bzl", "sources_hash")
go_library(
name = "generate_lib",
@@ -8,6 +9,12 @@
deps = ["//gazelle/manifest"],
)
+sources_hash(
+ name = "generate_lib_sources_hash",
+ go_library = ":generate_lib",
+ visibility = ["//visibility:public"],
+)
+
go_binary(
name = "generate",
embed = [":generate_lib"],
diff --git a/gazelle/manifest/generate/generate.go b/gazelle/manifest/generate/generate.go
index 04d7441..54e8813 100644
--- a/gazelle/manifest/generate/generate.go
+++ b/gazelle/manifest/generate/generate.go
@@ -24,6 +24,7 @@
}
func main() {
+ var manifestGeneratorHashPath string
var requirementsPath string
var pipRepositoryName string
var pipRepositoryIncremental bool
@@ -31,6 +32,12 @@
var outputPath string
var updateTarget string
flag.StringVar(
+ &manifestGeneratorHashPath,
+ "manifest-generator-hash",
+ "",
+ "The file containing the hash for the source code of the manifest generator."+
+ "This is important to force manifest updates when the generator logic changes.")
+ flag.StringVar(
&requirementsPath,
"requirements",
"",
@@ -92,7 +99,13 @@
Incremental: pipRepositoryIncremental,
},
})
- if err := writeOutput(outputPath, header, manifestFile, requirementsPath); err != nil {
+ if err := writeOutput(
+ outputPath,
+ header,
+ manifestFile,
+ manifestGeneratorHashPath,
+ requirementsPath,
+ ); err != nil {
log.Fatalf("ERROR: %v\n", err)
}
}
@@ -129,6 +142,7 @@
outputPath string,
header string,
manifestFile *manifest.File,
+ manifestGeneratorHashPath string,
requirementsPath string,
) error {
stat, err := os.Stat(outputPath)
@@ -146,7 +160,19 @@
return fmt.Errorf("failed to write output: %w", err)
}
- if err := manifestFile.Encode(outputFile, requirementsPath); err != nil {
+ manifestGeneratorHash, err := os.Open(manifestGeneratorHashPath)
+ if err != nil {
+ return fmt.Errorf("failed to write output: %w", err)
+ }
+ defer manifestGeneratorHash.Close()
+
+ requirements, err := os.Open(requirementsPath)
+ if err != nil {
+ return fmt.Errorf("failed to write output: %w", err)
+ }
+ defer requirements.Close()
+
+ if err := manifestFile.Encode(outputFile, manifestGeneratorHash, requirements); err != nil {
return fmt.Errorf("failed to write output: %w", err)
}
diff --git a/gazelle/manifest/hasher/BUILD.bazel b/gazelle/manifest/hasher/BUILD.bazel
new file mode 100644
index 0000000..5e67b2f
--- /dev/null
+++ b/gazelle/manifest/hasher/BUILD.bazel
@@ -0,0 +1,14 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "hasher_lib",
+ srcs = ["main.go"],
+ importpath = "github.com/bazelbuild/rules_python/gazelle/manifest/hasher",
+ visibility = ["//visibility:private"],
+)
+
+go_binary(
+ name = "hasher",
+ embed = [":hasher_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/gazelle/manifest/hasher/main.go b/gazelle/manifest/hasher/main.go
new file mode 100644
index 0000000..6e88335
--- /dev/null
+++ b/gazelle/manifest/hasher/main.go
@@ -0,0 +1,30 @@
+package main
+
+import (
+ "crypto/sha256"
+ "io"
+ "log"
+ "os"
+)
+
+func main() {
+ h := sha256.New()
+ out, err := os.Create(os.Args[1])
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer out.Close()
+ for _, filename := range os.Args[2:] {
+ f, err := os.Open(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer f.Close()
+ if _, err := io.Copy(h, f); err != nil {
+ log.Fatal(err)
+ }
+ }
+ if _, err := out.Write(h.Sum(nil)); err != nil {
+ log.Fatal(err)
+ }
+}
diff --git a/gazelle/manifest/manifest.go b/gazelle/manifest/manifest.go
index e19162b..640effc 100644
--- a/gazelle/manifest/manifest.go
+++ b/gazelle/manifest/manifest.go
@@ -26,12 +26,8 @@
}
// Encode encodes the manifest file to the given writer.
-func (f *File) Encode(w io.Writer, requirementsPath string) error {
- requirementsChecksum, err := sha256File(requirementsPath)
- if err != nil {
- return fmt.Errorf("failed to encode manifest file: %w", err)
- }
- integrityBytes, err := f.calculateIntegrity(requirementsChecksum)
+func (f *File) Encode(w io.Writer, manifestGeneratorHashFile, requirements io.Reader) error {
+ integrityBytes, err := f.calculateIntegrity(manifestGeneratorHashFile, requirements)
if err != nil {
return fmt.Errorf("failed to encode manifest file: %w", err)
}
@@ -45,12 +41,8 @@
}
// VerifyIntegrity verifies if the integrity set in the File is valid.
-func (f *File) VerifyIntegrity(requirementsPath string) (bool, error) {
- requirementsChecksum, err := sha256File(requirementsPath)
- if err != nil {
- return false, fmt.Errorf("failed to verify integrity: %w", err)
- }
- integrityBytes, err := f.calculateIntegrity(requirementsChecksum)
+func (f *File) VerifyIntegrity(manifestGeneratorHashFile, requirements io.Reader) (bool, error) {
+ integrityBytes, err := f.calculateIntegrity(manifestGeneratorHashFile, requirements)
if err != nil {
return false, fmt.Errorf("failed to verify integrity: %w", err)
}
@@ -62,7 +54,9 @@
// provided checksum for the requirements.txt file used as input to the modules
// mapping, plus the manifest structure in the manifest file. This integrity
// calculation ensures the manifest files are kept up-to-date.
-func (f *File) calculateIntegrity(requirementsChecksum []byte) ([]byte, error) {
+func (f *File) calculateIntegrity(
+ manifestGeneratorHash, requirements io.Reader,
+) ([]byte, error) {
hash := sha256.New()
// Sum the manifest part of the file.
@@ -72,8 +66,13 @@
return nil, fmt.Errorf("failed to calculate integrity: %w", err)
}
+ // Sum the manifest generator checksum bytes.
+ if _, err := io.Copy(hash, manifestGeneratorHash); err != nil {
+ return nil, fmt.Errorf("failed to calculate integrity: %w", err)
+ }
+
// Sum the requirements.txt checksum bytes.
- if _, err := hash.Write(requirementsChecksum); err != nil {
+ if _, err := io.Copy(hash, requirements); err != nil {
return nil, fmt.Errorf("failed to calculate integrity: %w", err)
}
@@ -134,19 +133,3 @@
// The incremental property of pip_repository.
Incremental bool
}
-
-// sha256File calculates the checksum of a given file path.
-func sha256File(filePath string) ([]byte, error) {
- file, err := os.Open(filePath)
- if err != nil {
- return nil, fmt.Errorf("failed to calculate sha256 sum for file: %w", err)
- }
- defer file.Close()
-
- hash := sha256.New()
- if _, err := io.Copy(hash, file); err != nil {
- return nil, fmt.Errorf("failed to calculate sha256 sum for file: %w", err)
- }
-
- return hash.Sum(nil), nil
-}
diff --git a/gazelle/manifest/manifest_test.go b/gazelle/manifest/manifest_test.go
index 3b50fd1..174d999 100644
--- a/gazelle/manifest/manifest_test.go
+++ b/gazelle/manifest/manifest_test.go
@@ -4,7 +4,9 @@
"bytes"
"io/ioutil"
"log"
+ "os"
"reflect"
+ "strings"
"testing"
"github.com/bazelbuild/rules_python/gazelle/manifest"
@@ -31,7 +33,14 @@
PipDepsRepositoryName: pipDepsRepositoryName,
})
var b bytes.Buffer
- if err := f.Encode(&b, "testdata/requirements.txt"); err != nil {
+ manifestGeneratorHashFile := strings.NewReader("")
+ requirements, err := os.Open("testdata/requirements.txt")
+ if err != nil {
+ log.Println(err)
+ t.FailNow()
+ }
+ defer requirements.Close()
+ if err := f.Encode(&b, manifestGeneratorHashFile, requirements); err != nil {
log.Println(err)
t.FailNow()
}
@@ -66,7 +75,14 @@
log.Println(err)
t.FailNow()
}
- valid, err := f.VerifyIntegrity("testdata/requirements.txt")
+ manifestGeneratorHashFile := strings.NewReader("")
+ requirements, err := os.Open("testdata/requirements.txt")
+ if err != nil {
+ log.Println(err)
+ t.FailNow()
+ }
+ defer requirements.Close()
+ valid, err := f.VerifyIntegrity(manifestGeneratorHashFile, requirements)
if err != nil {
log.Println(err)
t.FailNow()
diff --git a/gazelle/manifest/test/run.sh b/gazelle/manifest/test/run.sh
index 4b24b51..524e9b5 100755
--- a/gazelle/manifest/test/run.sh
+++ b/gazelle/manifest/test/run.sh
@@ -5,4 +5,7 @@
set -o errexit -o nounset
-"${_TEST_BINARY}" --requirements "${_TEST_REQUIREMENTS}" --manifest "${_TEST_MANIFEST}"
\ No newline at end of file
+"${_TEST_BINARY}" \
+ --manifest-generator-hash "${_TEST_MANIFEST_GENERATOR_HASH}" \
+ --requirements "${_TEST_REQUIREMENTS}" \
+ --manifest "${_TEST_MANIFEST}"
diff --git a/gazelle/manifest/test/test.go b/gazelle/manifest/test/test.go
index 518fe06..8b580b1 100644
--- a/gazelle/manifest/test/test.go
+++ b/gazelle/manifest/test/test.go
@@ -10,15 +10,23 @@
import (
"flag"
"log"
+ "os"
"path/filepath"
"github.com/bazelbuild/rules_python/gazelle/manifest"
)
func main() {
+ var manifestGeneratorHashPath string
var requirementsPath string
var manifestPath string
flag.StringVar(
+ &manifestGeneratorHashPath,
+ "manifest-generator-hash",
+ "",
+ "The file containing the hash for the source code of the manifest generator."+
+ "This is important to force manifest updates when the generator logic changes.")
+ flag.StringVar(
&requirementsPath,
"requirements",
"",
@@ -47,7 +55,19 @@
log.Fatalln("ERROR: failed to find the Gazelle manifest file integrity")
}
- valid, err := manifestFile.VerifyIntegrity(requirementsPath)
+ manifestGeneratorHash, err := os.Open(manifestGeneratorHashPath)
+ if err != nil {
+ log.Fatalf("ERROR: %v\n", err)
+ }
+ defer manifestGeneratorHash.Close()
+
+ requirements, err := os.Open(requirementsPath)
+ if err != nil {
+ log.Fatalf("ERROR: %v\n", err)
+ }
+ defer requirements.Close()
+
+ valid, err := manifestFile.VerifyIntegrity(manifestGeneratorHash, requirements)
if err != nil {
log.Fatalf("ERROR: %v\n", err)
}
@@ -60,4 +80,4 @@
"ERROR: %q is out-of-date, follow the intructions on this file for updating.\n",
manifestRealpath)
}
-}
\ No newline at end of file
+}
diff --git a/gazelle/manifest/testdata/gazelle_python.yaml b/gazelle/manifest/testdata/gazelle_python.yaml
index 4dc1f2c..70f7aff 100644
--- a/gazelle/manifest/testdata/gazelle_python.yaml
+++ b/gazelle/manifest/testdata/gazelle_python.yaml
@@ -10,4 +10,4 @@
arrow.parser: arrow
arrow.util: arrow
pip_deps_repository_name: test_repository_name
-integrity: 624f5f6c078eb44b907efd5a64e308354ac3620c568232b815668bcdf3e3366a
+integrity: eedf187f8b7ec27cdfc682feee4206e063b51d13d78f77c05d3a30ec11bd7411
diff --git a/gazelle/modules_mapping/def.bzl b/gazelle/modules_mapping/def.bzl
index 04ea50f..9b1352c 100644
--- a/gazelle/modules_mapping/def.bzl
+++ b/gazelle/modules_mapping/def.bzl
@@ -12,8 +12,9 @@
def _modules_mapping_impl(ctx):
modules_mapping = ctx.actions.declare_file(ctx.attr.modules_mapping_name)
args = ctx.actions.args()
- args.add(modules_mapping.path)
- args.add_all([whl.path for whl in ctx.files.wheels])
+ args.add("--output_file", modules_mapping.path)
+ args.add_all("--exclude_patterns", ctx.attr.exclude_patterns)
+ args.add_all("--wheels", [whl.path for whl in ctx.files.wheels])
ctx.actions.run(
inputs = ctx.files.wheels,
outputs = [modules_mapping],
@@ -26,6 +27,11 @@
modules_mapping = rule(
_modules_mapping_impl,
attrs = {
+ "exclude_patterns": attr.string_list(
+ default = ["^_|(\\._)+"],
+ doc = "A set of regex patterns to match against each calculated module path. By default, exclude the modules starting with underscores.",
+ mandatory = False,
+ ),
"modules_mapping_name": attr.string(
default = "modules_mapping.json",
doc = "The name for the output JSON file.",
diff --git a/gazelle/modules_mapping/generator.py b/gazelle/modules_mapping/generator.py
index ec3133a..51b81e7 100644
--- a/gazelle/modules_mapping/generator.py
+++ b/gazelle/modules_mapping/generator.py
@@ -1,5 +1,7 @@
+import argparse
import json
import pathlib
+import re
import sys
import zipfile
@@ -8,36 +10,69 @@
class Generator:
stderr = None
output_file = None
+ excluded_patterns = None
+ mapping = {}
- def __init__(self, stderr, output_file):
+ def __init__(self, stderr, output_file, excluded_patterns):
self.stderr = stderr
self.output_file = output_file
+ self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns]
# dig_wheel analyses the wheel .whl file determining the modules it provides
# by looking at the directory structure.
def dig_wheel(self, whl):
- mapping = {}
with zipfile.ZipFile(whl, "r") as zip_file:
for path in zip_file.namelist():
if is_metadata(path):
if data_has_purelib_or_platlib(path):
- module_for_path(path, whl, mapping)
+ self.module_for_path(path, whl)
else:
continue
else:
- module_for_path(path, whl, mapping)
- return mapping
+ self.module_for_path(path, whl)
+
+ def module_for_path(self, path, whl):
+ ext = pathlib.Path(path).suffix
+ if ext == ".py" or ext == ".so":
+ if "purelib" in path or "platlib" in path:
+ root = "/".join(path.split("/")[2:])
+ else:
+ root = path
+
+ wheel_name = get_wheel_name(whl)
+
+ if root.endswith("/__init__.py"):
+ # Note the '/' here means that the __init__.py is not in the
+ # root of the wheel, therefore we can index the directory
+ # where this file is as an importable package.
+ module = root[: -len("/__init__.py")].replace("/", ".")
+ if not self.is_excluded(module):
+ self.mapping[module] = wheel_name
+
+ # Always index the module file.
+ if ext == ".so":
+ # Also remove extra metadata that is embeded as part of
+ # the file name as an extra extension.
+ ext = "".join(pathlib.Path(root).suffixes)
+ module = root[: -len(ext)].replace("/", ".")
+ if not self.is_excluded(module):
+ self.mapping[module] = wheel_name
+
+ def is_excluded(self, module):
+ for pattern in self.excluded_patterns:
+ if pattern.search(module):
+ return True
+ return False
# run is the entrypoint for the generator.
def run(self, wheels):
- mapping = {}
for whl in wheels:
try:
- mapping.update(self.dig_wheel(whl))
+ self.dig_wheel(whl)
except AssertionError as error:
print(error, file=self.stderr)
return 1
- mapping_json = json.dumps(mapping)
+ mapping_json = json.dumps(self.mapping)
with open(self.output_file, "w") as f:
f.write(mapping_json)
return 0
@@ -71,34 +106,14 @@
return is_metadata(path) and (maybe_lib == "purelib" or maybe_lib == "platlib")
-def module_for_path(path, whl, mapping):
- ext = pathlib.Path(path).suffix
- if ext == ".py" or ext == ".so":
- if "purelib" in path or "platlib" in path:
- root = "/".join(path.split("/")[2:])
- else:
- root = path
-
- wheel_name = get_wheel_name(whl)
-
- if root.endswith("/__init__.py"):
- # Note the '/' here means that the __init__.py is not in the
- # root of the wheel, therefore we can index the directory
- # where this file is as an importable package.
- module = root[: -len("/__init__.py")].replace("/", ".")
- mapping[module] = wheel_name
-
- # Always index the module file.
- if ext == ".so":
- # Also remove extra metadata that is embeded as part of
- # the file name as an extra extension.
- ext = "".join(pathlib.Path(root).suffixes)
- module = root[: -len(ext)].replace("/", ".")
- mapping[module] = wheel_name
-
-
if __name__ == "__main__":
- output_file = sys.argv[1]
- wheels = sys.argv[2:]
- generator = Generator(sys.stderr, output_file)
- exit(generator.run(wheels))
+ parser = argparse.ArgumentParser(
+ prog="generator",
+ description="Generates the modules mapping used by the Gazelle manifest.",
+ )
+ parser.add_argument("--output_file", type=str)
+ parser.add_argument("--exclude_patterns", nargs="+", default=[])
+ parser.add_argument("--wheels", nargs="+", default=[])
+ args = parser.parse_args()
+ generator = Generator(sys.stderr, args.output_file, args.exclude_patterns)
+ exit(generator.run(args.wheels))