feat(gazelle): pure golang helper (#1895)
Remove gazelle plugin's python deps and make it hermetic. No more
relying on the system interpreter.
Use TreeSitter to parse Python code and use
https://github.com/pypi/stdlib-list to determine whether a module is in
std lib.
Fixes #1825
Fixes #1599
Related #1315
diff --git a/CHANGELOG.md b/CHANGELOG.md
index af97798..63ece30 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -30,6 +30,10 @@
marked as `reproducible` and will not include any lock file entries from now
on.
+* (gazelle): Remove gazelle plugin's python deps and make it hermetic.
+ Introduced a new Go-based helper leveraging tree-sitter for syntax analysis.
+ Implemented the use of `pypi/stdlib-list` for standard library module verification.
+
### Fixed
* (gazelle) Remove `visibility` from `NonEmptyAttr`.
Now empty(have no `deps/main/srcs/imports` attr) `py_library/test/binary` rules will
diff --git a/gazelle/BUILD.bazel b/gazelle/BUILD.bazel
index e00c74a..f74338d 100644
--- a/gazelle/BUILD.bazel
+++ b/gazelle/BUILD.bazel
@@ -1,4 +1,4 @@
-load("@bazel_gazelle//:def.bzl", "DEFAULT_LANGUAGES", "gazelle", "gazelle_binary")
+load("@bazel_gazelle//:def.bzl", "gazelle")
# Gazelle configuration options.
# See https://github.com/bazelbuild/bazel-gazelle#running-gazelle-with-bazel
@@ -6,19 +6,13 @@
# gazelle:exclude bazel-out
gazelle(
name = "gazelle",
- gazelle = ":gazelle_binary",
-)
-
-gazelle_binary(
- name = "gazelle_binary",
- languages = DEFAULT_LANGUAGES + ["//python"],
)
gazelle(
name = "gazelle_update_repos",
args = [
"-from_file=go.mod",
- "-to_macro=deps.bzl%gazelle_deps",
+ "-to_macro=deps.bzl%go_deps",
"-prune",
],
command = "update-repos",
diff --git a/gazelle/MODULE.bazel b/gazelle/MODULE.bazel
index 6ae7719..1829d24 100644
--- a/gazelle/MODULE.bazel
+++ b/gazelle/MODULE.bazel
@@ -9,6 +9,11 @@
bazel_dep(name = "rules_go", version = "0.41.0", repo_name = "io_bazel_rules_go")
bazel_dep(name = "gazelle", version = "0.33.0", repo_name = "bazel_gazelle")
+local_path_override(
+ module_name = "rules_python",
+ path = "..",
+)
+
go_deps = use_extension("@bazel_gazelle//:extensions.bzl", "go_deps")
go_deps.from_file(go_mod = "//:go.mod")
use_repo(
@@ -17,5 +22,18 @@
"com_github_bmatcuk_doublestar_v4",
"com_github_emirpasic_gods",
"com_github_ghodss_yaml",
+ "com_github_smacker_go_tree_sitter",
+ "com_github_stretchr_testify",
"in_gopkg_yaml_v2",
+ "org_golang_x_sync",
+)
+
+python_stdlib_list = use_extension("//python:extensions.bzl", "python_stdlib_list")
+use_repo(
+ python_stdlib_list,
+ "python_stdlib_list_3_10",
+ "python_stdlib_list_3_11",
+ "python_stdlib_list_3_12",
+ "python_stdlib_list_3_8",
+ "python_stdlib_list_3_9",
)
diff --git a/gazelle/WORKSPACE b/gazelle/WORKSPACE
index df2883f..d9f0645 100644
--- a/gazelle/WORKSPACE
+++ b/gazelle/WORKSPACE
@@ -34,16 +34,11 @@
path = "..",
)
-load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains")
+load("@rules_python//python:repositories.bzl", "py_repositories")
py_repositories()
-python_register_toolchains(
- name = "python_3_11",
- python_version = "3.11",
-)
-
load("//:deps.bzl", _py_gazelle_deps = "gazelle_deps")
-# gazelle:repository_macro deps.bzl%gazelle_deps
+# gazelle:repository_macro deps.bzl%go_deps
_py_gazelle_deps()
diff --git a/gazelle/deps.bzl b/gazelle/deps.bzl
index d9d3881..f4f4c24 100644
--- a/gazelle/deps.bzl
+++ b/gazelle/deps.bzl
@@ -14,13 +14,54 @@
"This file managed by `bazel run //:gazelle_update_repos`"
-load("@bazel_gazelle//:deps.bzl", _go_repository = "go_repository")
+load(
+ "@bazel_gazelle//:deps.bzl",
+ _go_repository = "go_repository",
+)
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file")
def go_repository(name, **kwargs):
if name not in native.existing_rules():
_go_repository(name = name, **kwargs)
+def python_stdlib_list_deps():
+ "Fetch python stdlib list dependencies"
+ http_file(
+ name = "python_stdlib_list_3_8",
+ sha256 = "ee6dc367011ff298b906dbaab408940aa57086d5f8f47278f4b7523b9aa13ae3",
+ url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.8.txt",
+ downloaded_file_path = "3.8.txt",
+ )
+ http_file(
+ name = "python_stdlib_list_3_9",
+ sha256 = "a4340e5ffe2e75bb18f548028cef6e6ac15384c44ae0a776e04dd869da1d1fd7",
+ url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.9.txt",
+ downloaded_file_path = "3.9.txt",
+ )
+ http_file(
+ name = "python_stdlib_list_3_10",
+ sha256 = "0b867738b78ac98944237de2600093a1c6ef259d1810017e46f01a29f3d199e7",
+ url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.10.txt",
+ downloaded_file_path = "3.10.txt",
+ )
+ http_file(
+ name = "python_stdlib_list_3_11",
+ sha256 = "3c1dbf991b17178d6ed3772f4fa8f64302feaf9c3385fef328a0c7ab736a79b1",
+ url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.11.txt",
+ downloaded_file_path = "3.11.txt",
+ )
+ http_file(
+ name = "python_stdlib_list_3_12",
+ sha256 = "6d3d53194218b43ee1d04bf9a4f0b6a9309bb59cdcaddede7d9cfe8b6835d34a",
+ url = "https://raw.githubusercontent.com/pypi/stdlib-list/8cbc2067a4a0f9eee57fb541e4cd7727724b7db4/stdlib_list/lists/3.12.txt",
+ downloaded_file_path = "3.12.txt",
+ )
+
def gazelle_deps():
+ go_deps()
+ python_stdlib_list_deps()
+
+def go_deps():
"Fetch go dependencies"
go_repository(
name = "co_honnef_go_tools",
@@ -28,13 +69,25 @@
sum = "h1:/hemPrYIhOhy8zYrNj+069zDB68us2sMGsfkFJO0iZs=",
version = "v0.0.0-20190523083050-ea95bdfd59fc",
)
+ go_repository(
+ name = "com_github_bazelbuild_bazel_gazelle",
+ importpath = "github.com/bazelbuild/bazel-gazelle",
+ sum = "h1:ROyUyUHzoEdvoOs1e0haxJx1l5EjZX6AOqiKdVlaBbg=",
+ version = "v0.31.1",
+ )
go_repository(
name = "com_github_bazelbuild_buildtools",
build_naming_convention = "go_default_library",
importpath = "github.com/bazelbuild/buildtools",
- sum = "h1:jhiMzJ+8unnLRtV8rpbWBFE9pFNzIqgUTyZU5aA++w8=",
- version = "v0.0.0-20221004120235-7186f635531b",
+ sum = "h1:HTepWP/jhtWTC1gvK0RnvKCgjh4gLqiwaOwGozAXcbw=",
+ version = "v0.0.0-20231103205921-433ea8554e82",
+ )
+ go_repository(
+ name = "com_github_bazelbuild_rules_go",
+ importpath = "github.com/bazelbuild/rules_go",
+ sum = "h1:JzlRxsFNhlX+g4drDRPhIaU5H5LnI978wdMJ0vK4I+k=",
+ version = "v0.41.0",
)
go_repository(
@@ -81,6 +134,13 @@
version = "v0.3.4",
)
go_repository(
+ name = "com_github_davecgh_go_spew",
+ importpath = "github.com/davecgh/go-spew",
+ sum = "h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=",
+ version = "v1.1.1",
+ )
+
+ go_repository(
name = "com_github_emirpasic_gods",
importpath = "github.com/emirpasic/gods",
sum = "h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=",
@@ -98,6 +158,12 @@
sum = "h1:EQciDnbrYxy13PgWoY8AqoxGiPrpgBZ1R8UNe3ddc+A=",
version = "v0.1.0",
)
+ go_repository(
+ name = "com_github_fsnotify_fsnotify",
+ importpath = "github.com/fsnotify/fsnotify",
+ sum = "h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=",
+ version = "v1.6.0",
+ )
go_repository(
name = "com_github_ghodss_yaml",
@@ -114,14 +180,14 @@
go_repository(
name = "com_github_golang_mock",
importpath = "github.com/golang/mock",
- sum = "h1:G5FRp8JnTd7RQH5kemVNlMeyXQAztQ3mOWV95KxsXH8=",
- version = "v1.1.1",
+ sum = "h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=",
+ version = "v1.6.0",
)
go_repository(
name = "com_github_golang_protobuf",
importpath = "github.com/golang/protobuf",
- sum = "h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM=",
- version = "v1.4.3",
+ sum = "h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw=",
+ version = "v1.5.2",
)
go_repository(
name = "com_github_google_go_cmp",
@@ -129,6 +195,12 @@
sum = "h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=",
version = "v0.5.9",
)
+ go_repository(
+ name = "com_github_pmezard_go_difflib",
+ importpath = "github.com/pmezard/go-difflib",
+ sum = "h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=",
+ version = "v1.0.0",
+ )
go_repository(
name = "com_github_prometheus_client_model",
@@ -137,6 +209,25 @@
version = "v0.0.0-20190812154241-14fe0d1b01d4",
)
go_repository(
+ name = "com_github_smacker_go_tree_sitter",
+ importpath = "github.com/smacker/go-tree-sitter",
+ sum = "h1:7QZKUmQfnxncZIJGyvX8M8YeMfn8kM10j3J/2KwVTN4=",
+ version = "v0.0.0-20240422154435-0628b34cbf9c",
+ )
+ go_repository(
+ name = "com_github_stretchr_objx",
+ importpath = "github.com/stretchr/objx",
+ sum = "h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=",
+ version = "v0.5.2",
+ )
+ go_repository(
+ name = "com_github_stretchr_testify",
+ importpath = "github.com/stretchr/testify",
+ sum = "h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=",
+ version = "v1.9.0",
+ )
+
+ go_repository(
name = "com_github_yuin_goldmark",
importpath = "github.com/yuin/goldmark",
sum = "h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=",
@@ -161,6 +252,13 @@
version = "v2.4.0",
)
go_repository(
+ name = "in_gopkg_yaml_v3",
+ importpath = "gopkg.in/yaml.v3",
+ sum = "h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=",
+ version = "v3.0.1",
+ )
+
+ go_repository(
name = "net_starlark_go",
importpath = "go.starlark.net",
sum = "h1:xwwDQW5We85NaTk2APgoN9202w/l0DVGp+GZMfsrh7s=",
@@ -181,14 +279,14 @@
go_repository(
name = "org_golang_google_grpc",
importpath = "google.golang.org/grpc",
- sum = "h1:rRYRFMVgRv6E0D70Skyfsr28tDXIuuPZyWGMPdMcnXg=",
- version = "v1.27.0",
+ sum = "h1:fPVVDxY9w++VjTZsYvXWqEf9Rqar/e+9zYfxKK+W+YU=",
+ version = "v1.50.0",
)
go_repository(
name = "org_golang_google_protobuf",
importpath = "google.golang.org/protobuf",
- sum = "h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c=",
- version = "v1.25.0",
+ sum = "h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=",
+ version = "v1.28.0",
)
go_repository(
name = "org_golang_x_crypto",
@@ -211,14 +309,14 @@
go_repository(
name = "org_golang_x_mod",
importpath = "golang.org/x/mod",
- sum = "h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s=",
- version = "v0.6.0-dev.0.20220419223038-86c51ed26bb4",
+ sum = "h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk=",
+ version = "v0.10.0",
)
go_repository(
name = "org_golang_x_net",
importpath = "golang.org/x/net",
- sum = "h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=",
- version = "v0.0.0-20220722155237-a158d28d115b",
+ sum = "h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=",
+ version = "v0.10.0",
)
go_repository(
name = "org_golang_x_oauth2",
@@ -229,20 +327,20 @@
go_repository(
name = "org_golang_x_sync",
importpath = "golang.org/x/sync",
- sum = "h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw=",
- version = "v0.0.0-20220722155255-886fb9371eb4",
+ sum = "h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=",
+ version = "v0.2.0",
)
go_repository(
name = "org_golang_x_sys",
importpath = "golang.org/x/sys",
- sum = "h1:k5II8e6QD8mITdi+okbbmR/cIyEbeXLBhy5Ha4nevyc=",
- version = "v0.0.0-20221010170243-090e33056c14",
+ sum = "h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=",
+ version = "v0.8.0",
)
go_repository(
name = "org_golang_x_text",
importpath = "golang.org/x/text",
- sum = "h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=",
- version = "v0.3.7",
+ sum = "h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=",
+ version = "v0.3.3",
)
go_repository(
name = "org_golang_x_tools",
@@ -250,8 +348,8 @@
"gazelle:exclude **/testdata/**/*",
],
importpath = "golang.org/x/tools",
- sum = "h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU=",
- version = "v0.1.12",
+ sum = "h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo=",
+ version = "v0.9.1",
)
go_repository(
name = "org_golang_x_xerrors",
diff --git a/gazelle/go.mod b/gazelle/go.mod
index b9b79ac..4b65e71 100644
--- a/gazelle/go.mod
+++ b/gazelle/go.mod
@@ -4,17 +4,23 @@
require (
github.com/bazelbuild/bazel-gazelle v0.31.1
- github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d
+ github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82
github.com/bazelbuild/rules_go v0.41.0
github.com/bmatcuk/doublestar/v4 v4.6.1
github.com/emirpasic/gods v1.18.1
github.com/ghodss/yaml v1.0.0
+ github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c
+ github.com/stretchr/testify v1.9.0
+ golang.org/x/sync v0.2.0
gopkg.in/yaml.v2 v2.4.0
)
require (
+ github.com/davecgh/go-spew v1.1.1 // indirect
github.com/google/go-cmp v0.5.9 // indirect
+ github.com/pmezard/go-difflib v1.0.0 // indirect
golang.org/x/mod v0.10.0 // indirect
golang.org/x/sys v0.8.0 // indirect
golang.org/x/tools v0.9.1 // indirect
+ gopkg.in/yaml.v3 v3.0.1 // indirect
)
diff --git a/gazelle/go.sum b/gazelle/go.sum
index fcfcb28..46e0127 100644
--- a/gazelle/go.sum
+++ b/gazelle/go.sum
@@ -2,8 +2,8 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/bazelbuild/bazel-gazelle v0.31.1 h1:ROyUyUHzoEdvoOs1e0haxJx1l5EjZX6AOqiKdVlaBbg=
github.com/bazelbuild/bazel-gazelle v0.31.1/go.mod h1:Ul0pqz50f5wxz0QNzsZ+mrEu4AVAVJZEB5xLnHgIG9c=
-github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d h1:Fl1FfItZp34QIQmmDTbZXHB5XA6JfbNNfH7tRRGWvQo=
-github.com/bazelbuild/buildtools v0.0.0-20230510134650-37bd1811516d/go.mod h1:689QdV3hBP7Vo9dJMmzhoYIyo/9iMhEmHkJcnaPRCbo=
+github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82 h1:HTepWP/jhtWTC1gvK0RnvKCgjh4gLqiwaOwGozAXcbw=
+github.com/bazelbuild/buildtools v0.0.0-20231103205921-433ea8554e82/go.mod h1:689QdV3hBP7Vo9dJMmzhoYIyo/9iMhEmHkJcnaPRCbo=
github.com/bazelbuild/rules_go v0.41.0 h1:JzlRxsFNhlX+g4drDRPhIaU5H5LnI978wdMJ0vK4I+k=
github.com/bazelbuild/rules_go v0.41.0/go.mod h1:TMHmtfpvyfsxaqfL9WnahCsXMWDMICTw7XeK9yVb+YU=
github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I=
@@ -13,6 +13,9 @@
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -38,7 +41,17 @@
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c h1:7QZKUmQfnxncZIJGyvX8M8YeMfn8kM10j3J/2KwVTN4=
+github.com/smacker/go-tree-sitter v0.0.0-20240422154435-0628b34cbf9c/go.mod h1:q99oHDsbP0xRwmn7Vmob8gbSMNyvJ83OauXPSuHQuKE=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
go.starlark.net v0.0.0-20210223155950-e043a3d3c984/go.mod h1:t3mmBBPzAVvK0L0n1drDmrQsJ8FoIx4INCqVMTr/Zo0=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -55,6 +68,8 @@
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI=
+golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -90,5 +105,8 @@
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel
index 4cca8b3..195c776 100644
--- a/gazelle/python/BUILD.bazel
+++ b/gazelle/python/BUILD.bazel
@@ -1,31 +1,31 @@
load("@bazel_gazelle//:def.bzl", "gazelle_binary")
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
-load("@rules_python//python:defs.bzl", "py_binary", "py_test")
+load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
load(":gazelle_test.bzl", "gazelle_test")
go_library(
name = "python",
srcs = [
"configure.go",
+ "file_parser.go",
"fix.go",
"generate.go",
"kinds.go",
"language.go",
- "lifecycle.go",
"parser.go",
"resolve.go",
"std_modules.go",
"target.go",
],
# NOTE @aignas 2023-12-03: currently gazelle does not support embedding
- # generated files, but helper.zip is generated by a build rule.
+ # generated files, but 3.11.txt is generated by a build rule.
#
# You will get a benign error like when running gazelle locally:
- # > 8 gazelle: .../rules_python/gazelle/python/lifecycle.go:26:3: pattern helper.zip: matched no files
+ # > 8 gazelle: .../rules_python/gazelle/python/std_modules.go:24:3: pattern 3.11.txt: matched no files
#
# See following for more info:
# https://github.com/bazelbuild/bazel-gazelle/issues/1513
- embedsrcs = [":helper.zip"], # keep
+ embedsrcs = ["stdlib_list.txt"], # keep # TODO: use user-defined version?
importpath = "github.com/bazelbuild/rules_python/gazelle/python",
visibility = ["//visibility:public"],
deps = [
@@ -42,35 +42,27 @@
"@com_github_emirpasic_gods//lists/singlylinkedlist",
"@com_github_emirpasic_gods//sets/treeset",
"@com_github_emirpasic_gods//utils",
+ "@com_github_smacker_go_tree_sitter//:go-tree-sitter",
+ "@com_github_smacker_go_tree_sitter//python",
+ "@org_golang_x_sync//errgroup",
],
)
-py_binary(
- name = "helper",
- srcs = [
- "__main__.py",
- "parse.py",
- "std_modules.py",
- ],
- # This is to make sure that the current directory is added to PYTHONPATH
- imports = ["."],
- main = "__main__.py",
- visibility = ["//visibility:public"],
-)
-
-py_test(
- name = "parse_test",
- srcs = [
- "parse.py",
- "parse_test.py",
- ],
- imports = ["."],
-)
-
-filegroup(
- name = "helper.zip",
- srcs = [":helper"],
- output_group = "python_zip_file",
+copy_file(
+ name = "stdlib_list",
+ src = select(
+ {
+ "@rules_python//python/config_settings:is_python_3.10": "@python_stdlib_list_3_10//file",
+ "@rules_python//python/config_settings:is_python_3.11": "@python_stdlib_list_3_11//file",
+ "@rules_python//python/config_settings:is_python_3.12": "@python_stdlib_list_3_12//file",
+ "@rules_python//python/config_settings:is_python_3.8": "@python_stdlib_list_3_8//file",
+ "@rules_python//python/config_settings:is_python_3.9": "@python_stdlib_list_3_9//file",
+ # This is the same behaviour as previously
+ "//conditions:default": "@python_stdlib_list_3_11//file",
+ },
+ ),
+ out = "stdlib_list.txt",
+ allow_symlink = True,
)
# gazelle:exclude testdata/
@@ -80,7 +72,6 @@
srcs = ["python_test.go"],
data = [
":gazelle_binary",
- ":helper",
],
test_dirs = glob(
# Use this so that we don't need to manually maintain the list.
@@ -109,3 +100,15 @@
srcs = glob(["**"]),
visibility = ["//:__pkg__"],
)
+
+go_test(
+ name = "default_test",
+ srcs = [
+ "file_parser_test.go",
+ "std_modules_test.go",
+ ],
+ embed = [":python"],
+ deps = [
+ "@com_github_stretchr_testify//assert",
+ ],
+)
diff --git a/gazelle/python/__main__.py b/gazelle/python/__main__.py
deleted file mode 100644
index 9974c66..0000000
--- a/gazelle/python/__main__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright 2023 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# parse.py is a long-living program that communicates over STDIN and STDOUT.
-# STDIN receives parse requests, one per line. It outputs the parsed modules and
-# comments from all the files from each request.
-
-import sys
-
-import parse
-import std_modules
-
-if __name__ == "__main__":
- if len(sys.argv) < 2:
- sys.exit("Please provide subcommand, either parse or std_modules")
- if sys.argv[1] == "parse":
- sys.exit(parse.main(sys.stdin, sys.stdout))
- elif sys.argv[1] == "std_modules":
- sys.exit(std_modules.main(sys.stdin, sys.stdout))
- else:
- sys.exit("Unknown subcommand: " + sys.argv[1])
diff --git a/gazelle/python/extensions.bzl b/gazelle/python/extensions.bzl
new file mode 100644
index 0000000..8d339c0
--- /dev/null
+++ b/gazelle/python/extensions.bzl
@@ -0,0 +1,5 @@
+"python_stdlib_list module extension for use with bzlmod"
+
+load("//python/private:extensions.bzl", _python_stdlib_list = "python_stdlib_list")
+
+python_stdlib_list = _python_stdlib_list
diff --git a/gazelle/python/file_parser.go b/gazelle/python/file_parser.go
new file mode 100644
index 0000000..a2b22c2
--- /dev/null
+++ b/gazelle/python/file_parser.go
@@ -0,0 +1,201 @@
+// Copyright 2023 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package python
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "strings"
+
+ sitter "github.com/smacker/go-tree-sitter"
+ "github.com/smacker/go-tree-sitter/python"
+)
+
+const (
+ sitterNodeTypeString = "string"
+ sitterNodeTypeComment = "comment"
+ sitterNodeTypeIdentifier = "identifier"
+ sitterNodeTypeDottedName = "dotted_name"
+ sitterNodeTypeIfStatement = "if_statement"
+ sitterNodeTypeAliasedImport = "aliased_import"
+ sitterNodeTypeWildcardImport = "wildcard_import"
+ sitterNodeTypeImportStatement = "import_statement"
+ sitterNodeTypeComparisonOperator = "comparison_operator"
+ sitterNodeTypeImportFromStatement = "import_from_statement"
+)
+
+type ParserOutput struct {
+ FileName string
+ Modules []module
+ Comments []comment
+ HasMain bool
+}
+
+type FileParser struct {
+ code []byte
+ relFilepath string
+ output ParserOutput
+}
+
+func NewFileParser() *FileParser {
+ return &FileParser{}
+}
+
+func ParseCode(code []byte) (*sitter.Node, error) {
+ parser := sitter.NewParser()
+ parser.SetLanguage(python.GetLanguage())
+
+ tree, err := parser.ParseCtx(context.Background(), nil, code)
+ if err != nil {
+ return nil, err
+ }
+
+ return tree.RootNode(), nil
+}
+
+func (p *FileParser) parseMain(ctx context.Context, node *sitter.Node) bool {
+ for i := 0; i < int(node.ChildCount()); i++ {
+ if err := ctx.Err(); err != nil {
+ return false
+ }
+ child := node.Child(i)
+ if child.Type() == sitterNodeTypeIfStatement &&
+ child.Child(1).Type() == sitterNodeTypeComparisonOperator && child.Child(1).Child(1).Type() == "==" {
+ statement := child.Child(1)
+ a, b := statement.Child(0), statement.Child(2)
+ // convert "'__main__' == __name__" to "__name__ == '__main__'"
+ if b.Type() == sitterNodeTypeIdentifier {
+ a, b = b, a
+ }
+ if a.Type() == sitterNodeTypeIdentifier && a.Content(p.code) == "__name__" &&
+ // at github.com/smacker/go-tree-sitter@latest (after v0.0.0-20240422154435-0628b34cbf9c we used)
+ // "__main__" is the second child of b. But now, it isn't.
+ // we cannot use the latest go-tree-sitter because of the top level reference in scanner.c.
+ // https://github.com/smacker/go-tree-sitter/blob/04d6b33fe138a98075210f5b770482ded024dc0f/python/scanner.c#L1
+ b.Type() == sitterNodeTypeString && string(p.code[b.StartByte()+1:b.EndByte()-1]) == "__main__" {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+func parseImportStatement(node *sitter.Node, code []byte) (module, bool) {
+ switch node.Type() {
+ case sitterNodeTypeDottedName:
+ return module{
+ Name: node.Content(code),
+ LineNumber: node.StartPoint().Row + 1,
+ }, true
+ case sitterNodeTypeAliasedImport:
+ return parseImportStatement(node.Child(0), code)
+ case sitterNodeTypeWildcardImport:
+ return module{
+ Name: "*",
+ LineNumber: node.StartPoint().Row + 1,
+ }, true
+ }
+ return module{}, false
+}
+
+func (p *FileParser) parseImportStatements(node *sitter.Node) bool {
+ if node.Type() == sitterNodeTypeImportStatement {
+ for j := 1; j < int(node.ChildCount()); j++ {
+ m, ok := parseImportStatement(node.Child(j), p.code)
+ if !ok {
+ continue
+ }
+ m.Filepath = p.relFilepath
+ if strings.HasPrefix(m.Name, ".") {
+ continue
+ }
+ p.output.Modules = append(p.output.Modules, m)
+ }
+ } else if node.Type() == sitterNodeTypeImportFromStatement {
+ from := node.Child(1).Content(p.code)
+ if strings.HasPrefix(from, ".") {
+ return true
+ }
+ for j := 3; j < int(node.ChildCount()); j++ {
+ m, ok := parseImportStatement(node.Child(j), p.code)
+ if !ok {
+ continue
+ }
+ m.Filepath = p.relFilepath
+ m.From = from
+ m.Name = fmt.Sprintf("%s.%s", from, m.Name)
+ p.output.Modules = append(p.output.Modules, m)
+ }
+ } else {
+ return false
+ }
+ return true
+}
+
+func (p *FileParser) parseComments(node *sitter.Node) bool {
+ if node.Type() == sitterNodeTypeComment {
+ p.output.Comments = append(p.output.Comments, comment(node.Content(p.code)))
+ return true
+ }
+ return false
+}
+
+func (p *FileParser) SetCodeAndFile(code []byte, relPackagePath, filename string) {
+ p.code = code
+ p.relFilepath = filepath.Join(relPackagePath, filename)
+ p.output.FileName = filename
+}
+
+func (p *FileParser) parse(ctx context.Context, node *sitter.Node) {
+ if node == nil {
+ return
+ }
+ for i := 0; i < int(node.ChildCount()); i++ {
+ if err := ctx.Err(); err != nil {
+ return
+ }
+ child := node.Child(i)
+ if p.parseImportStatements(child) {
+ continue
+ }
+ if p.parseComments(child) {
+ continue
+ }
+ p.parse(ctx, child)
+ }
+}
+
+func (p *FileParser) Parse(ctx context.Context) (*ParserOutput, error) {
+ rootNode, err := ParseCode(p.code)
+ if err != nil {
+ return nil, err
+ }
+
+ p.output.HasMain = p.parseMain(ctx, rootNode)
+
+ p.parse(ctx, rootNode)
+ return &p.output, nil
+}
+
+func (p *FileParser) ParseFile(ctx context.Context, repoRoot, relPackagePath, filename string) (*ParserOutput, error) {
+ code, err := os.ReadFile(filepath.Join(repoRoot, relPackagePath, filename))
+ if err != nil {
+ return nil, err
+ }
+ p.SetCodeAndFile(code, relPackagePath, filename)
+ return p.Parse(ctx)
+}
diff --git a/gazelle/python/file_parser_test.go b/gazelle/python/file_parser_test.go
new file mode 100644
index 0000000..3682cff
--- /dev/null
+++ b/gazelle/python/file_parser_test.go
@@ -0,0 +1,256 @@
+// Copyright 2023 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package python
+
+import (
+ "context"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestParseImportStatements(t *testing.T) {
+ t.Parallel()
+ units := []struct {
+ name string
+ code string
+ filepath string
+ result []module
+ }{
+ {
+ name: "not has import",
+ code: "a = 1\nb = 2",
+ filepath: "",
+ result: nil,
+ },
+ {
+ name: "has import",
+ code: "import unittest\nimport os.path\nfrom foo.bar import abc.xyz",
+ filepath: "abc.py",
+ result: []module{
+ {
+ Name: "unittest",
+ LineNumber: 1,
+ Filepath: "abc.py",
+ From: "",
+ },
+ {
+ Name: "os.path",
+ LineNumber: 2,
+ Filepath: "abc.py",
+ From: "",
+ },
+ {
+ Name: "foo.bar.abc.xyz",
+ LineNumber: 3,
+ Filepath: "abc.py",
+ From: "foo.bar",
+ },
+ },
+ },
+ {
+ name: "has import in def",
+ code: `def foo():
+ import unittest
+`,
+ filepath: "abc.py",
+ result: []module{
+ {
+ Name: "unittest",
+ LineNumber: 2,
+ Filepath: "abc.py",
+ From: "",
+ },
+ },
+ },
+ {
+ name: "invalid syntax",
+ code: "import os\nimport",
+ filepath: "abc.py",
+ result: []module{
+ {
+ Name: "os",
+ LineNumber: 1,
+ Filepath: "abc.py",
+ From: "",
+ },
+ },
+ },
+ {
+ name: "import as",
+ code: "import os as b\nfrom foo import bar as c# 123",
+ filepath: "abc.py",
+ result: []module{
+ {
+ Name: "os",
+ LineNumber: 1,
+ Filepath: "abc.py",
+ From: "",
+ },
+ {
+ Name: "foo.bar",
+ LineNumber: 2,
+ Filepath: "abc.py",
+ From: "foo",
+ },
+ },
+ },
+ // align to https://docs.python.org/3/reference/simple_stmts.html#index-34
+ {
+ name: "complex import",
+ code: "from unittest import *\nfrom foo import (bar as c, baz, qux as d)\nfrom . import abc",
+ result: []module{
+ {
+ Name: "unittest.*",
+ LineNumber: 1,
+ From: "unittest",
+ },
+ {
+ Name: "foo.bar",
+ LineNumber: 2,
+ From: "foo",
+ },
+ {
+ Name: "foo.baz",
+ LineNumber: 2,
+ From: "foo",
+ },
+ {
+ Name: "foo.qux",
+ LineNumber: 2,
+ From: "foo",
+ },
+ },
+ },
+ }
+ for _, u := range units {
+ t.Run(u.name, func(t *testing.T) {
+ p := NewFileParser()
+ code := []byte(u.code)
+ p.SetCodeAndFile(code, "", u.filepath)
+ output, err := p.Parse(context.Background())
+ assert.NoError(t, err)
+ assert.Equal(t, u.result, output.Modules)
+ })
+ }
+}
+
+func TestParseComments(t *testing.T) {
+ t.Parallel()
+ units := []struct {
+ name string
+ code string
+ result []comment
+ }{
+ {
+ name: "not has comment",
+ code: "a = 1\nb = 2",
+ result: nil,
+ },
+ {
+ name: "has comment",
+ code: "# a = 1\n# b = 2",
+ result: []comment{"# a = 1", "# b = 2"},
+ },
+ {
+ name: "has comment in if",
+ code: "if True:\n # a = 1\n # b = 2",
+ result: []comment{"# a = 1", "# b = 2"},
+ },
+ {
+ name: "has comment inline",
+ code: "import os# 123\nfrom pathlib import Path as b#456",
+ result: []comment{"# 123", "#456"},
+ },
+ }
+ for _, u := range units {
+ t.Run(u.name, func(t *testing.T) {
+ p := NewFileParser()
+ code := []byte(u.code)
+ p.SetCodeAndFile(code, "", "")
+ output, err := p.Parse(context.Background())
+ assert.NoError(t, err)
+ assert.Equal(t, u.result, output.Comments)
+ })
+ }
+}
+
+func TestParseMain(t *testing.T) {
+ t.Parallel()
+ units := []struct {
+ name string
+ code string
+ result bool
+ }{
+ {
+ name: "not has main",
+ code: "a = 1\nb = 2",
+ result: false,
+ },
+ {
+ name: "has main in function",
+ code: `def foo():
+ if __name__ == "__main__":
+ a = 3
+`,
+ result: false,
+ },
+ {
+ name: "has main",
+ code: `
+import unittest
+
+from lib import main
+
+
+class ExampleTest(unittest.TestCase):
+ def test_main(self):
+ self.assertEqual(
+ "",
+ main([["A", 1], ["B", 2]]),
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()
+`,
+ result: true,
+ },
+ }
+ for _, u := range units {
+ t.Run(u.name, func(t *testing.T) {
+ p := NewFileParser()
+ code := []byte(u.code)
+ p.SetCodeAndFile(code, "", "")
+ output, err := p.Parse(context.Background())
+ assert.NoError(t, err)
+ assert.Equal(t, u.result, output.HasMain)
+ })
+ }
+}
+
+func TestParseFull(t *testing.T) {
+ p := NewFileParser()
+ code := []byte(`from bar import abc`)
+ p.SetCodeAndFile(code, "foo", "a.py")
+ output, err := p.Parse(context.Background())
+ assert.NoError(t, err)
+ assert.Equal(t, ParserOutput{
+ Modules: []module{{Name: "bar.abc", LineNumber: 1, Filepath: "foo/a.py", From: "bar"}},
+ Comments: nil,
+ HasMain: false,
+ FileName: "a.py",
+ }, *output)
+}
diff --git a/gazelle/python/language.go b/gazelle/python/language.go
index 568ac92..56eb97b 100644
--- a/gazelle/python/language.go
+++ b/gazelle/python/language.go
@@ -23,7 +23,6 @@
type Python struct {
Configurer
Resolver
- LifeCycleManager
}
// NewLanguage initializes a new Python that satisfies the language.Language
diff --git a/gazelle/python/lifecycle.go b/gazelle/python/lifecycle.go
deleted file mode 100644
index 6d628e9..0000000
--- a/gazelle/python/lifecycle.go
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2023 The Bazel Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package python
-
-import (
- "context"
- _ "embed"
- "github.com/bazelbuild/bazel-gazelle/language"
- "log"
- "os"
-)
-
-var (
- //go:embed helper.zip
- helperZip []byte
- helperPath string
-)
-
-type LifeCycleManager struct {
- language.BaseLifecycleManager
- pyzFilePath string
-}
-
-func (l *LifeCycleManager) Before(ctx context.Context) {
- helperPath = os.Getenv("GAZELLE_PYTHON_HELPER")
- if helperPath == "" {
- pyzFile, err := os.CreateTemp("", "python_zip_")
- if err != nil {
- log.Fatalf("failed to write parser zip: %v", err)
- }
- defer pyzFile.Close()
- helperPath = pyzFile.Name()
- l.pyzFilePath = helperPath
- if _, err := pyzFile.Write(helperZip); err != nil {
- log.Fatalf("cannot write %q: %v", helperPath, err)
- }
- }
- startParserProcess(ctx)
- startStdModuleProcess(ctx)
-}
-
-func (l *LifeCycleManager) DoneGeneratingRules() {
- shutdownParserProcess()
-}
-
-func (l *LifeCycleManager) AfterResolvingDeps(ctx context.Context) {
- shutdownStdModuleProcess()
- if l.pyzFilePath != "" {
- os.Remove(l.pyzFilePath)
- }
-}
diff --git a/gazelle/python/parse.py b/gazelle/python/parse.py
deleted file mode 100644
index ea331bc..0000000
--- a/gazelle/python/parse.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright 2023 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# parse.py is a long-living program that communicates over STDIN and STDOUT.
-# STDIN receives parse requests, one per line. It outputs the parsed modules and
-# comments from all the files from each request.
-
-import ast
-import concurrent.futures
-import json
-import os
-import platform
-import sys
-from io import BytesIO
-from tokenize import COMMENT, NAME, OP, STRING, tokenize
-
-
-def parse_import_statements(content, filepath):
- modules = list()
- tree = ast.parse(content, filename=filepath)
- for node in ast.walk(tree):
- if isinstance(node, ast.Import):
- for subnode in node.names:
- module = {
- "name": subnode.name,
- "lineno": node.lineno,
- "filepath": filepath,
- "from": "",
- }
- modules.append(module)
- elif isinstance(node, ast.ImportFrom) and node.level == 0:
- for subnode in node.names:
- module = {
- "name": f"{node.module}.{subnode.name}",
- "lineno": node.lineno,
- "filepath": filepath,
- "from": node.module,
- }
- modules.append(module)
- return modules
-
-
-def parse_comments(content):
- comments = list()
- g = tokenize(BytesIO(content.encode("utf-8")).readline)
- for toknum, tokval, _, _, _ in g:
- if toknum == COMMENT:
- comments.append(tokval)
- return comments
-
-
-def parse_main(content):
- g = tokenize(BytesIO(content.encode("utf-8")).readline)
- for token_type, token_val, start, _, _ in g:
- if token_type != NAME or token_val != "if" or start[1] != 0:
- continue
- try:
- token_type, token_val, start, _, _ = next(g)
- if token_type != NAME or token_val != "__name__":
- continue
- token_type, token_val, start, _, _ = next(g)
- if token_type != OP or token_val != "==":
- continue
- token_type, token_val, start, _, _ = next(g)
- if token_type != STRING or token_val.strip("\"'") != "__main__":
- continue
- token_type, token_val, start, _, _ = next(g)
- if token_type != OP or token_val != ":":
- continue
- return True
- except StopIteration:
- break
- return False
-
-
-def parse(repo_root, rel_package_path, filename):
- rel_filepath = os.path.join(rel_package_path, filename)
- abs_filepath = os.path.join(repo_root, rel_filepath)
- with open(abs_filepath, "r") as file:
- content = file.read()
- # From simple benchmarks, 2 workers gave the best performance here.
- with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
- modules_future = executor.submit(
- parse_import_statements, content, rel_filepath
- )
- comments_future = executor.submit(parse_comments, content)
- main_future = executor.submit(parse_main, content)
- modules = modules_future.result()
- comments = comments_future.result()
- has_main = main_future.result()
-
- output = {
- "filename": filename,
- "modules": modules,
- "comments": comments,
- "has_main": has_main,
- }
- return output
-
-
-def create_main_executor():
- # We cannot use ProcessPoolExecutor on macOS, because the fork start method should be considered unsafe as it can
- # lead to crashes of the subprocess as macOS system libraries may start threads. Meanwhile, the 'spawn' and
- # 'forkserver' start methods generally cannot be used with “frozen” executables (i.e., Python zip file) on POSIX
- # systems. Therefore, there is no good way to use ProcessPoolExecutor on macOS when we distribute this program with
- # a zip file.
- # Ref: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
- if platform.system() == "Darwin":
- return concurrent.futures.ThreadPoolExecutor()
- return concurrent.futures.ProcessPoolExecutor()
-
-def main(stdin, stdout):
- with create_main_executor() as executor:
- for parse_request in stdin:
- parse_request = json.loads(parse_request)
- repo_root = parse_request["repo_root"]
- rel_package_path = parse_request["rel_package_path"]
- filenames = parse_request["filenames"]
- outputs = list()
- if len(filenames) == 1:
- outputs.append(parse(repo_root, rel_package_path, filenames[0]))
- else:
- futures = [
- executor.submit(parse, repo_root, rel_package_path, filename)
- for filename in filenames
- if filename != ""
- ]
- for future in concurrent.futures.as_completed(futures):
- outputs.append(future.result())
- print(json.dumps(outputs), end="", file=stdout, flush=True)
- stdout.buffer.write(bytes([0]))
- stdout.flush()
-
-
-if __name__ == "__main__":
- exit(main(sys.stdin, sys.stdout))
diff --git a/gazelle/python/parse_test.py b/gazelle/python/parse_test.py
deleted file mode 100644
index 6d1fa49..0000000
--- a/gazelle/python/parse_test.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import unittest
-
-import parse
-
-
-class TestParse(unittest.TestCase):
- def test_not_has_main(self):
- content = "a = 1\nb = 2"
- self.assertFalse(parse.parse_main(content))
-
- def test_has_main_in_function(self):
- content = """
-def foo():
- if __name__ == "__main__":
- a = 3
-"""
- self.assertFalse(parse.parse_main(content))
-
- def test_has_main(self):
- content = """
-import unittest
-
-from lib import main
-
-
-class ExampleTest(unittest.TestCase):
- def test_main(self):
- self.assertEqual(
- "",
- main([["A", 1], ["B", 2]]),
- )
-
-
-if __name__ == "__main__":
- unittest.main()
-"""
- self.assertTrue(parse.parse_main(content))
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/gazelle/python/parser.go b/gazelle/python/parser.go
index 184fad7..1b2a90d 100644
--- a/gazelle/python/parser.go
+++ b/gazelle/python/parser.go
@@ -15,65 +15,16 @@
package python
import (
- "bufio"
"context"
_ "embed"
- "encoding/json"
"fmt"
- "io"
- "log"
- "os"
- "os/exec"
"strings"
- "sync"
"github.com/emirpasic/gods/sets/treeset"
godsutils "github.com/emirpasic/gods/utils"
+ "golang.org/x/sync/errgroup"
)
-var (
- parserCmd *exec.Cmd
- parserStdin io.WriteCloser
- parserStdout io.Reader
- parserMutex sync.Mutex
-)
-
-func startParserProcess(ctx context.Context) {
- // due to #691, we need a system interpreter to boostrap, part of which is
- // to locate the hermetic interpreter.
- parserCmd = exec.CommandContext(ctx, "python3", helperPath, "parse")
- parserCmd.Stderr = os.Stderr
-
- stdin, err := parserCmd.StdinPipe()
- if err != nil {
- log.Printf("failed to initialize parser: %v\n", err)
- os.Exit(1)
- }
- parserStdin = stdin
-
- stdout, err := parserCmd.StdoutPipe()
- if err != nil {
- log.Printf("failed to initialize parser: %v\n", err)
- os.Exit(1)
- }
- parserStdout = stdout
-
- if err := parserCmd.Start(); err != nil {
- log.Printf("failed to initialize parser: %v\n", err)
- os.Exit(1)
- }
-}
-
-func shutdownParserProcess() {
- if err := parserStdin.Close(); err != nil {
- fmt.Fprintf(os.Stderr, "error closing parser: %v", err)
- }
-
- if err := parserCmd.Wait(); err != nil {
- log.Printf("failed to wait for parser: %v\n", err)
- }
-}
-
// python3Parser implements a parser for Python files that extracts the modules
// as seen in the import statements.
type python3Parser struct {
@@ -110,36 +61,36 @@
// parse parses multiple Python files and returns the extracted modules from
// the import statements as well as the parsed comments.
func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, map[string]*treeset.Set, *annotations, error) {
- parserMutex.Lock()
- defer parserMutex.Unlock()
-
modules := treeset.NewWith(moduleComparator)
- req := map[string]interface{}{
- "repo_root": p.repoRoot,
- "rel_package_path": p.relPackagePath,
- "filenames": pyFilenames.Values(),
+ g, ctx := errgroup.WithContext(context.Background())
+ ch := make(chan struct{}, 6) // Limit the number of concurrent parses.
+ chRes := make(chan *ParserOutput, len(pyFilenames.Values()))
+ for _, v := range pyFilenames.Values() {
+ ch <- struct{}{}
+ g.Go(func(filename string) func() error {
+ return func() error {
+ defer func() {
+ <-ch
+ }()
+ res, err := NewFileParser().ParseFile(ctx, p.repoRoot, p.relPackagePath, filename)
+ if err != nil {
+ return err
+ }
+ chRes <- res
+ return nil
+ }
+ }(v.(string)))
}
- encoder := json.NewEncoder(parserStdin)
- if err := encoder.Encode(&req); err != nil {
- return nil, nil, nil, fmt.Errorf("failed to parse: %w", err)
+ if err := g.Wait(); err != nil {
+ return nil, nil, nil, err
}
-
- reader := bufio.NewReader(parserStdout)
- data, err := reader.ReadBytes(0)
- if err != nil {
- return nil, nil, nil, fmt.Errorf("failed to parse: %w", err)
- }
- data = data[:len(data)-1]
- var allRes []parserResponse
- if err := json.Unmarshal(data, &allRes); err != nil {
- return nil, nil, nil, fmt.Errorf("failed to parse: %w", err)
- }
-
- mainModules := make(map[string]*treeset.Set, len(allRes))
+ close(ch)
+ close(chRes)
+ mainModules := make(map[string]*treeset.Set, len(chRes))
allAnnotations := new(annotations)
allAnnotations.ignore = make(map[string]struct{})
- for _, res := range allRes {
+ for res := range chRes {
if res.HasMain {
mainModules[res.FileName] = treeset.NewWith(moduleComparator)
}
@@ -194,21 +145,6 @@
return dedupe
}
-// parserResponse represents a response returned by the parser.py for a given
-// parsed Python module.
-type parserResponse struct {
- // FileName of the parsed module
- FileName string
- // The modules depended by the parsed module.
- Modules []module `json:"modules"`
- // The comments contained in the parsed module. This contains the
- // annotations as they are comments in the Python module.
- Comments []comment `json:"comments"`
- // HasMain indicates whether the Python module has `if __name == "__main__"`
- // at the top level
- HasMain bool `json:"has_main"`
-}
-
// module represents a fully-qualified, dot-separated, Python module as seen on
// the import statement, alongside the line number where it happened.
type module struct {
diff --git a/gazelle/python/private/BUILD.bazel b/gazelle/python/private/BUILD.bazel
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gazelle/python/private/BUILD.bazel
diff --git a/gazelle/python/private/extensions.bzl b/gazelle/python/private/extensions.bzl
new file mode 100644
index 0000000..5de0713
--- /dev/null
+++ b/gazelle/python/private/extensions.bzl
@@ -0,0 +1,9 @@
+"python_stdlib_list module extension for use with bzlmod"
+
+load("@bazel_skylib//lib:modules.bzl", "modules")
+load("//:deps.bzl", "python_stdlib_list_deps")
+
+python_stdlib_list = modules.as_extension(
+ python_stdlib_list_deps,
+ doc = "This extension registers python stdlib list dependencies.",
+)
diff --git a/gazelle/python/python_test.go b/gazelle/python/python_test.go
index 617b3f8..dd8c241 100644
--- a/gazelle/python/python_test.go
+++ b/gazelle/python/python_test.go
@@ -31,7 +31,6 @@
"time"
"github.com/bazelbuild/bazel-gazelle/testtools"
- "github.com/bazelbuild/rules_go/go/runfiles"
"github.com/bazelbuild/rules_go/go/tools/bazel"
"github.com/ghodss/yaml"
)
@@ -42,9 +41,8 @@
gazelleBinaryName = "gazelle_binary"
)
-var gazellePath = mustFindGazelle()
-
func TestGazelleBinary(t *testing.T) {
+ gazellePath := mustFindGazelle()
tests := map[string][]bazel.RunfileEntry{}
runfiles, err := bazel.ListRunfiles()
@@ -67,13 +65,12 @@
if len(tests) == 0 {
t.Fatal("no tests found")
}
-
for testName, files := range tests {
- testPath(t, testName, files)
+ testPath(t, gazellePath, testName, files)
}
}
-func testPath(t *testing.T, name string, files []bazel.RunfileEntry) {
+func testPath(t *testing.T, gazellePath, name string, files []bazel.RunfileEntry) {
t.Run(name, func(t *testing.T) {
t.Parallel()
var inputs, goldens []testtools.FileSpec
@@ -160,11 +157,6 @@
cmd.Stdout = &stdout
cmd.Stderr = &stderr
cmd.Dir = workspaceRoot
- helperScript, err := runfiles.Rlocation("rules_python_gazelle_plugin/python/helper")
- if err != nil {
- t.Fatalf("failed to initialize Python helper: %v", err)
- }
- cmd.Env = append(os.Environ(), "GAZELLE_PYTHON_HELPER="+helperScript)
if err := cmd.Run(); err != nil {
var e *exec.ExitError
if !errors.As(err, &e) {
diff --git a/gazelle/python/resolve.go b/gazelle/python/resolve.go
index f019a64..ca306c3 100644
--- a/gazelle/python/resolve.go
+++ b/gazelle/python/resolve.go
@@ -202,11 +202,7 @@
matches := ix.FindRulesByImportWithConfig(c, imp, languageName)
if len(matches) == 0 {
// Check if the imported module is part of the standard library.
- if isStd, err := isStdModule(module{Name: moduleName}); err != nil {
- log.Println("Error checking if standard module: ", err)
- hasFatalError = true
- continue POSSIBLE_MODULE_LOOP
- } else if isStd {
+ if isStdModule(module{Name: moduleName}) {
continue MODULES_LOOP
} else if cfg.ValidateImportStatements() {
err := fmt.Errorf(
diff --git a/gazelle/python/std_modules.go b/gazelle/python/std_modules.go
index 8a016af..e10f87b 100644
--- a/gazelle/python/std_modules.go
+++ b/gazelle/python/std_modules.go
@@ -16,92 +16,25 @@
import (
"bufio"
- "context"
_ "embed"
- "fmt"
- "io"
- "log"
- "os"
- "os/exec"
- "strconv"
"strings"
- "sync"
)
var (
- stdModulesCmd *exec.Cmd
- stdModulesStdin io.WriteCloser
- stdModulesStdout io.Reader
- stdModulesMutex sync.Mutex
- stdModulesSeen map[string]struct{}
+ //go:embed stdlib_list.txt
+ stdlibList string
+ stdModules map[string]struct{}
)
-func startStdModuleProcess(ctx context.Context) {
- stdModulesSeen = make(map[string]struct{})
-
- // due to #691, we need a system interpreter to boostrap, part of which is
- // to locate the hermetic interpreter.
- stdModulesCmd = exec.CommandContext(ctx, "python3", helperPath, "std_modules")
- stdModulesCmd.Stderr = os.Stderr
- // All userland site-packages should be ignored.
- stdModulesCmd.Env = []string{"PYTHONNOUSERSITE=1"}
-
- stdin, err := stdModulesCmd.StdinPipe()
- if err != nil {
- log.Printf("failed to initialize std_modules: %v\n", err)
- os.Exit(1)
- }
- stdModulesStdin = stdin
-
- stdout, err := stdModulesCmd.StdoutPipe()
- if err != nil {
- log.Printf("failed to initialize std_modules: %v\n", err)
- os.Exit(1)
- }
- stdModulesStdout = stdout
-
- if err := stdModulesCmd.Start(); err != nil {
- log.Printf("failed to initialize std_modules: %v\n", err)
- os.Exit(1)
+func init() {
+ stdModules = make(map[string]struct{})
+ scanner := bufio.NewScanner(strings.NewReader(stdlibList))
+ for scanner.Scan() {
+ stdModules[scanner.Text()] = struct{}{}
}
}
-func shutdownStdModuleProcess() {
- if err := stdModulesStdin.Close(); err != nil {
- fmt.Fprintf(os.Stderr, "error closing std module: %v", err)
- }
-
- if err := stdModulesCmd.Wait(); err != nil {
- log.Printf("failed to wait for std_modules: %v\n", err)
- }
-}
-
-func isStdModule(m module) (bool, error) {
- if _, seen := stdModulesSeen[m.Name]; seen {
- return true, nil
- }
- stdModulesMutex.Lock()
- defer stdModulesMutex.Unlock()
-
- fmt.Fprintf(stdModulesStdin, "%s\n", m.Name)
-
- stdoutReader := bufio.NewReader(stdModulesStdout)
- line, err := stdoutReader.ReadString('\n')
- if err != nil {
- return false, err
- }
- if len(line) == 0 {
- return false, fmt.Errorf("unexpected empty output from std_modules")
- }
-
- isStd, err := strconv.ParseBool(strings.TrimSpace(line))
- if err != nil {
- return false, err
- }
-
- if isStd {
- stdModulesSeen[m.Name] = struct{}{}
- return true, nil
- }
- return false, nil
+func isStdModule(m module) bool {
+ _, ok := stdModules[m.Name]
+ return ok
}
diff --git a/gazelle/python/std_modules.py b/gazelle/python/std_modules.py
deleted file mode 100644
index 779a325..0000000
--- a/gazelle/python/std_modules.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright 2023 The Bazel Authors. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# std_modules.py is a long-living program that communicates over STDIN and
-# STDOUT. STDIN receives module names, one per line. For each module statement
-# it evaluates, it outputs true/false for whether the module is part of the
-# standard library or not.
-
-import os
-import sys
-from contextlib import redirect_stdout
-
-
-def is_std_modules(module):
- # If for some reason a module (such as pygame, see https://github.com/pygame/pygame/issues/542)
- # prints to stdout upon import,
- # the output of this script should still be parseable by golang.
- # Therefore, redirect stdout while running the import.
- with redirect_stdout(os.devnull):
- try:
- __import__(module, globals(), locals(), [], 0)
- return True
- except Exception:
- return False
-
-
-def main(stdin, stdout):
- for module in stdin:
- module = module.strip()
- # Don't print the boolean directly as it is capitalized in Python.
- print(
- "true" if is_std_modules(module) else "false",
- end="\n",
- file=stdout,
- )
- stdout.flush()
-
-
-if __name__ == "__main__":
- exit(main(sys.stdin, sys.stdout))
diff --git a/gazelle/python/std_modules_test.go b/gazelle/python/std_modules_test.go
new file mode 100644
index 0000000..bc22638
--- /dev/null
+++ b/gazelle/python/std_modules_test.go
@@ -0,0 +1,27 @@
+// Copyright 2023 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package python
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestIsStdModule(t *testing.T) {
+ assert.True(t, isStdModule(module{Name: "unittest"}))
+ assert.True(t, isStdModule(module{Name: "os.path"}))
+ assert.False(t, isStdModule(module{Name: "foo"}))
+}