feat: Creating one py_binary per main module (#1584)
Many existing Python repos don't use `__main__.py` to indicate the the
main module. Instead, they put something like below in any Python files:
```python
if __name__ == "__main__":
main()
```
This PR makes the Gazelle extension able to recognize main modules like
this, when `__main__.py` doesn't exist. This reduces the need to create
`__main__.py` when enabling Gazelle extensions in existing Python repos.
The current behavior of creating single `py_binary` for `__main__.py` is
preserved and takes precedence. So this is a backward-compatible change.
Closes #1566.
diff --git a/examples/bzlmod/BUILD.bazel b/examples/bzlmod/BUILD.bazel
index 6a4fdb8..bb16f98 100644
--- a/examples/bzlmod/BUILD.bazel
+++ b/examples/bzlmod/BUILD.bazel
@@ -25,6 +25,7 @@
# with pip-compile.
compile_pip_requirements_3_10(
name = "requirements_3_10",
+ timeout = "moderate",
src = "requirements.in",
requirements_txt = "requirements_lock_3_10.txt",
requirements_windows = "requirements_windows_3_10.txt",
diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel
index 1d9460c..fd051eb 100644
--- a/gazelle/python/BUILD.bazel
+++ b/gazelle/python/BUILD.bazel
@@ -1,6 +1,6 @@
load("@bazel_gazelle//:def.bzl", "gazelle_binary")
load("@io_bazel_rules_go//go:def.bzl", "go_library")
-load("@rules_python//python:defs.bzl", "py_binary")
+load("@rules_python//python:defs.bzl", "py_binary", "py_test")
load(":gazelle_test.bzl", "gazelle_test")
go_library(
@@ -58,6 +58,15 @@
visibility = ["//visibility:public"],
)
+py_test(
+ name = "parse_test",
+ srcs = [
+ "parse.py",
+ "parse_test.py",
+ ],
+ imports = ["."],
+)
+
filegroup(
name = "helper.zip",
srcs = [":helper"],
diff --git a/gazelle/python/__main__.py b/gazelle/python/__main__.py
index 18bc1ca..9974c66 100644
--- a/gazelle/python/__main__.py
+++ b/gazelle/python/__main__.py
@@ -23,7 +23,7 @@
if __name__ == "__main__":
if len(sys.argv) < 2:
- sys.exit("Please provide subcommand, either print or std_modules")
+ sys.exit("Please provide subcommand, either parse or std_modules")
if sys.argv[1] == "parse":
sys.exit(parse.main(sys.stdin, sys.stdout))
elif sys.argv[1] == "std_modules":
diff --git a/gazelle/python/generate.go b/gazelle/python/generate.go
index 25fb194..f812f17 100644
--- a/gazelle/python/generate.go
+++ b/gazelle/python/generate.go
@@ -20,6 +20,7 @@
"log"
"os"
"path/filepath"
+ "sort"
"strings"
"github.com/bazelbuild/bazel-gazelle/config"
@@ -89,9 +90,9 @@
pyTestFilenames := treeset.NewWith(godsutils.StringComparator)
pyFileNames := treeset.NewWith(godsutils.StringComparator)
- // hasPyBinary controls whether a py_binary target should be generated for
+ // hasPyBinaryEntryPointFile controls whether a single py_binary target should be generated for
// this package or not.
- hasPyBinary := false
+ hasPyBinaryEntryPointFile := false
// hasPyTestEntryPointFile and hasPyTestEntryPointTarget control whether a py_test target should
// be generated for this package or not.
@@ -106,8 +107,8 @@
ext := filepath.Ext(f)
if ext == ".py" {
pyFileNames.Add(f)
- if !hasPyBinary && f == pyBinaryEntrypointFilename {
- hasPyBinary = true
+ if !hasPyBinaryEntryPointFile && f == pyBinaryEntrypointFilename {
+ hasPyBinaryEntryPointFile = true
} else if !hasPyTestEntryPointFile && f == pyTestEntrypointFilename {
hasPyTestEntryPointFile = true
} else if f == conftestFilename {
@@ -219,7 +220,7 @@
collisionErrors := singlylinkedlist.New()
appendPyLibrary := func(srcs *treeset.Set, pyLibraryTargetName string) {
- deps, err := parser.parse(srcs)
+ deps, mainModules, err := parser.parse(srcs)
if err != nil {
log.Fatalf("ERROR: %v\n", err)
}
@@ -228,16 +229,33 @@
// exists, and if it is of a different kind from the one we are
// generating. If so, we have to throw an error since Gazelle won't
// generate it correctly.
- if args.File != nil {
- for _, t := range args.File.Rules {
- if t.Name() == pyLibraryTargetName && t.Kind() != actualPyLibraryKind {
- fqTarget := label.New("", args.Rel, pyLibraryTargetName)
- err := fmt.Errorf("failed to generate target %q of kind %q: "+
- "a target of kind %q with the same name already exists. "+
- "Use the '# gazelle:%s' directive to change the naming convention.",
- fqTarget.String(), actualPyLibraryKind, t.Kind(), pythonconfig.LibraryNamingConvention)
- collisionErrors.Add(err)
+ if err := ensureNoCollision(args.File, pyLibraryTargetName, actualPyLibraryKind); err != nil {
+ fqTarget := label.New("", args.Rel, pyLibraryTargetName)
+ err := fmt.Errorf("failed to generate target %q of kind %q: %w. "+
+ "Use the '# gazelle:%s' directive to change the naming convention.",
+ fqTarget.String(), actualPyLibraryKind, err, pythonconfig.LibraryNamingConvention)
+ collisionErrors.Add(err)
+ }
+
+ if !hasPyBinaryEntryPointFile {
+ sort.Strings(mainModules)
+ // Creating one py_binary target per main module when __main__.py doesn't exist.
+ for _, filename := range mainModules {
+ pyBinaryTargetName := strings.TrimSuffix(filepath.Base(filename), ".py")
+ if err := ensureNoCollision(args.File, pyBinaryTargetName, actualPyBinaryKind); err != nil {
+ fqTarget := label.New("", args.Rel, pyBinaryTargetName)
+ log.Printf("failed to generate target %q of kind %q: %v",
+ fqTarget.String(), actualPyBinaryKind, err)
+ continue
}
+ srcs.Remove(filename)
+ pyBinary := newTargetBuilder(pyBinaryKind, pyBinaryTargetName, pythonProjectRoot, args.Rel, pyFileNames).
+ addVisibility(visibility).
+ addSrc(filename).
+ addModuleDependencies(deps).
+ generateImportsAttribute().build()
+ result.Gen = append(result.Gen, pyBinary)
+ result.Imports = append(result.Imports, pyBinary.PrivateAttr(config.GazelleImportsKey))
}
}
@@ -270,8 +288,8 @@
appendPyLibrary(pyLibraryFilenames, cfg.RenderLibraryName(packageName))
}
- if hasPyBinary {
- deps, err := parser.parseSingle(pyBinaryEntrypointFilename)
+ if hasPyBinaryEntryPointFile {
+ deps, _, err := parser.parseSingle(pyBinaryEntrypointFilename)
if err != nil {
log.Fatalf("ERROR: %v\n", err)
}
@@ -282,17 +300,12 @@
// exists, and if it is of a different kind from the one we are
// generating. If so, we have to throw an error since Gazelle won't
// generate it correctly.
- if args.File != nil {
- for _, t := range args.File.Rules {
- if t.Name() == pyBinaryTargetName && t.Kind() != actualPyBinaryKind {
- fqTarget := label.New("", args.Rel, pyBinaryTargetName)
- err := fmt.Errorf("failed to generate target %q of kind %q: "+
- "a target of kind %q with the same name already exists. "+
- "Use the '# gazelle:%s' directive to change the naming convention.",
- fqTarget.String(), actualPyBinaryKind, t.Kind(), pythonconfig.BinaryNamingConvention)
- collisionErrors.Add(err)
- }
- }
+ if err := ensureNoCollision(args.File, pyBinaryTargetName, actualPyBinaryKind); err != nil {
+ fqTarget := label.New("", args.Rel, pyBinaryTargetName)
+ err := fmt.Errorf("failed to generate target %q of kind %q: %w. "+
+ "Use the '# gazelle:%s' directive to change the naming convention.",
+ fqTarget.String(), actualPyBinaryKind, err, pythonconfig.BinaryNamingConvention)
+ collisionErrors.Add(err)
}
pyBinaryTarget := newTargetBuilder(pyBinaryKind, pyBinaryTargetName, pythonProjectRoot, args.Rel, pyFileNames).
@@ -310,7 +323,7 @@
var conftest *rule.Rule
if hasConftestFile {
- deps, err := parser.parseSingle(conftestFilename)
+ deps, _, err := parser.parseSingle(conftestFilename)
if err != nil {
log.Fatalf("ERROR: %v\n", err)
}
@@ -319,16 +332,11 @@
// exists, and if it is of a different kind from the one we are
// generating. If so, we have to throw an error since Gazelle won't
// generate it correctly.
- if args.File != nil {
- for _, t := range args.File.Rules {
- if t.Name() == conftestTargetname && t.Kind() != actualPyLibraryKind {
- fqTarget := label.New("", args.Rel, conftestTargetname)
- err := fmt.Errorf("failed to generate target %q of kind %q: "+
- "a target of kind %q with the same name already exists.",
- fqTarget.String(), actualPyLibraryKind, t.Kind())
- collisionErrors.Add(err)
- }
- }
+ if err := ensureNoCollision(args.File, conftestTargetname, actualPyLibraryKind); err != nil {
+ fqTarget := label.New("", args.Rel, conftestTargetname)
+ err := fmt.Errorf("failed to generate target %q of kind %q: %w. ",
+ fqTarget.String(), actualPyLibraryKind, err)
+ collisionErrors.Add(err)
}
conftestTarget := newTargetBuilder(pyLibraryKind, conftestTargetname, pythonProjectRoot, args.Rel, pyFileNames).
@@ -346,7 +354,7 @@
var pyTestTargets []*targetBuilder
newPyTestTargetBuilder := func(srcs *treeset.Set, pyTestTargetName string) *targetBuilder {
- deps, err := parser.parse(srcs)
+ deps, _, err := parser.parse(srcs)
if err != nil {
log.Fatalf("ERROR: %v\n", err)
}
@@ -354,17 +362,12 @@
// exists, and if it is of a different kind from the one we are
// generating. If so, we have to throw an error since Gazelle won't
// generate it correctly.
- if args.File != nil {
- for _, t := range args.File.Rules {
- if t.Name() == pyTestTargetName && t.Kind() != actualPyTestKind {
- fqTarget := label.New("", args.Rel, pyTestTargetName)
- err := fmt.Errorf("failed to generate target %q of kind %q: "+
- "a target of kind %q with the same name already exists. "+
- "Use the '# gazelle:%s' directive to change the naming convention.",
- fqTarget.String(), actualPyTestKind, t.Kind(), pythonconfig.TestNamingConvention)
- collisionErrors.Add(err)
- }
- }
+ if err := ensureNoCollision(args.File, pyTestTargetName, actualPyTestKind); err != nil {
+ fqTarget := label.New("", args.Rel, pyTestTargetName)
+ err := fmt.Errorf("failed to generate target %q of kind %q: %w. "+
+ "Use the '# gazelle:%s' directive to change the naming convention.",
+ fqTarget.String(), actualPyTestKind, err, pythonconfig.TestNamingConvention)
+ collisionErrors.Add(err)
}
return newTargetBuilder(pyTestKind, pyTestTargetName, pythonProjectRoot, args.Rel, pyFileNames).
addSrcs(srcs).
@@ -476,3 +479,15 @@
return false
}
}
+
+func ensureNoCollision(file *rule.File, targetName, kind string) error {
+ if file == nil {
+ return nil
+ }
+ for _, t := range file.Rules {
+ if t.Name() == targetName && t.Kind() != kind {
+ return fmt.Errorf("a target of kind %q with the same name already exists", t.Kind())
+ }
+ }
+ return nil
+}
diff --git a/gazelle/python/parse.py b/gazelle/python/parse.py
index 6c0ef69..daa6d2b 100644
--- a/gazelle/python/parse.py
+++ b/gazelle/python/parse.py
@@ -22,7 +22,7 @@
import os
import sys
from io import BytesIO
-from tokenize import COMMENT, tokenize
+from tokenize import COMMENT, NAME, OP, STRING, tokenize
def parse_import_statements(content, filepath):
@@ -59,6 +59,30 @@
return comments
+def parse_main(content):
+ g = tokenize(BytesIO(content.encode("utf-8")).readline)
+ for token_type, token_val, start, _, _ in g:
+ if token_type != NAME or token_val != "if" or start[1] != 0:
+ continue
+ try:
+ token_type, token_val, start, _, _ = next(g)
+ if token_type != NAME or token_val != "__name__":
+ continue
+ token_type, token_val, start, _, _ = next(g)
+ if token_type != OP or token_val != "==":
+ continue
+ token_type, token_val, start, _, _ = next(g)
+ if token_type != STRING or token_val.strip("\"'") != '__main__':
+ continue
+ token_type, token_val, start, _, _ = next(g)
+ if token_type != OP or token_val != ":":
+ continue
+ return True
+ except StopIteration:
+ break
+ return False
+
+
def parse(repo_root, rel_package_path, filename):
rel_filepath = os.path.join(rel_package_path, filename)
abs_filepath = os.path.join(repo_root, rel_filepath)
@@ -70,11 +94,16 @@
parse_import_statements, content, rel_filepath
)
comments_future = executor.submit(parse_comments, content)
+ main_future = executor.submit(parse_main, content)
modules = modules_future.result()
comments = comments_future.result()
+ has_main = main_future.result()
+
output = {
+ "filename": filename,
"modules": modules,
"comments": comments,
+ "has_main": has_main,
}
return output
diff --git a/gazelle/python/parse_test.py b/gazelle/python/parse_test.py
new file mode 100644
index 0000000..3ebded4
--- /dev/null
+++ b/gazelle/python/parse_test.py
@@ -0,0 +1,39 @@
+import unittest
+import parse
+
+class TestParse(unittest.TestCase):
+ def test_not_has_main(self):
+ content = "a = 1\nb = 2"
+ self.assertFalse(parse.parse_main(content))
+
+ def test_has_main_in_function(self):
+ content = """
+def foo():
+ if __name__ == "__main__":
+ a = 3
+"""
+ self.assertFalse(parse.parse_main(content))
+
+ def test_has_main(self):
+ content = """
+import unittest
+
+from lib import main
+
+
+class ExampleTest(unittest.TestCase):
+ def test_main(self):
+ self.assertEqual(
+ "",
+ main([["A", 1], ["B", 2]]),
+ )
+
+
+if __name__ == "__main__":
+ unittest.main()
+"""
+ self.assertTrue(parse.parse_main(content))
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/gazelle/python/parser.go b/gazelle/python/parser.go
index 8931026..d22850b 100644
--- a/gazelle/python/parser.go
+++ b/gazelle/python/parser.go
@@ -101,7 +101,7 @@
// parseSingle parses a single Python file and returns the extracted modules
// from the import statements as well as the parsed comments.
-func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, error) {
+func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, []string, error) {
pyFilenames := treeset.NewWith(godsutils.StringComparator)
pyFilenames.Add(pyFilename)
return p.parse(pyFilenames)
@@ -109,7 +109,7 @@
// parse parses multiple Python files and returns the extracted modules from
// the import statements as well as the parsed comments.
-func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, error) {
+func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, []string, error) {
parserMutex.Lock()
defer parserMutex.Unlock()
@@ -122,24 +122,28 @@
}
encoder := json.NewEncoder(parserStdin)
if err := encoder.Encode(&req); err != nil {
- return nil, fmt.Errorf("failed to parse: %w", err)
+ return nil, nil, fmt.Errorf("failed to parse: %w", err)
}
reader := bufio.NewReader(parserStdout)
data, err := reader.ReadBytes(0)
if err != nil {
- return nil, fmt.Errorf("failed to parse: %w", err)
+ return nil, nil, fmt.Errorf("failed to parse: %w", err)
}
data = data[:len(data)-1]
var allRes []parserResponse
if err := json.Unmarshal(data, &allRes); err != nil {
- return nil, fmt.Errorf("failed to parse: %w", err)
+ return nil, nil, fmt.Errorf("failed to parse: %w", err)
}
+ var mainModules []string
for _, res := range allRes {
+ if res.HasMain {
+ mainModules = append(mainModules, res.FileName)
+ }
annotations, err := annotationsFromComments(res.Comments)
if err != nil {
- return nil, fmt.Errorf("failed to parse annotations: %w", err)
+ return nil, nil, fmt.Errorf("failed to parse annotations: %w", err)
}
for _, m := range res.Modules {
@@ -159,17 +163,22 @@
}
}
- return modules, nil
+ return modules, mainModules, nil
}
// parserResponse represents a response returned by the parser.py for a given
// parsed Python module.
type parserResponse struct {
+ // FileName of the parsed module
+ FileName string
// The modules depended by the parsed module.
Modules []module `json:"modules"`
// The comments contained in the parsed module. This contains the
// annotations as they are comments in the Python module.
Comments []comment `json:"comments"`
+ // HasMain indicates whether the Python module has `if __name == "__main__"`
+ // at the top level
+ HasMain bool `json:"has_main"`
}
// module represents a fully-qualified, dot-separated, Python module as seen on
diff --git a/gazelle/python/testdata/binary_without_entrypoint/BUILD.in b/gazelle/python/testdata/binary_without_entrypoint/BUILD.in
new file mode 100644
index 0000000..7aace67
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/BUILD.in
@@ -0,0 +1,6 @@
+# gazelle:python_library_naming_convention py_default_library
+
+filegroup(
+ name = "collided_main",
+ srcs = ["collided_main.py"],
+)
diff --git a/gazelle/python/testdata/binary_without_entrypoint/BUILD.out b/gazelle/python/testdata/binary_without_entrypoint/BUILD.out
new file mode 100644
index 0000000..c88f2ff
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/BUILD.out
@@ -0,0 +1,34 @@
+load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")
+
+# gazelle:python_library_naming_convention py_default_library
+
+filegroup(
+ name = "collided_main",
+ srcs = ["collided_main.py"],
+)
+
+py_binary(
+ name = "main",
+ srcs = ["main.py"],
+ visibility = ["//:__subpackages__"],
+)
+
+py_binary(
+ name = "main2",
+ srcs = ["main2.py"],
+ visibility = ["//:__subpackages__"],
+)
+
+py_library(
+ name = "py_default_library",
+ srcs = [
+ "__init__.py",
+ "collided_main.py",
+ ],
+ visibility = ["//:__subpackages__"],
+)
+
+py_test(
+ name = "main_test",
+ srcs = ["main_test.py"],
+)
\ No newline at end of file
diff --git a/gazelle/python/testdata/binary_without_entrypoint/README.md b/gazelle/python/testdata/binary_without_entrypoint/README.md
new file mode 100644
index 0000000..e91250d
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/README.md
@@ -0,0 +1,4 @@
+# Binary without entrypoint
+
+This test case asserts that when there is no __main__.py, a py_binary is generated per main module, unless a main
+module main collides with existing target name.
diff --git a/gazelle/python/testdata/binary_without_entrypoint/WORKSPACE b/gazelle/python/testdata/binary_without_entrypoint/WORKSPACE
new file mode 100644
index 0000000..faff6af
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/WORKSPACE
@@ -0,0 +1 @@
+# This is a Bazel workspace for the Gazelle test data.
diff --git a/gazelle/python/testdata/binary_without_entrypoint/__init__.py b/gazelle/python/testdata/binary_without_entrypoint/__init__.py
new file mode 100644
index 0000000..7307559
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# For test purposes only.
diff --git a/gazelle/python/testdata/binary_without_entrypoint/collided_main.py b/gazelle/python/testdata/binary_without_entrypoint/collided_main.py
new file mode 100644
index 0000000..3668fcc
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/collided_main.py
@@ -0,0 +1,2 @@
+if __name__ == "__main__":
+ run()
\ No newline at end of file
diff --git a/gazelle/python/testdata/binary_without_entrypoint/main.py b/gazelle/python/testdata/binary_without_entrypoint/main.py
new file mode 100644
index 0000000..3668fcc
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/main.py
@@ -0,0 +1,2 @@
+if __name__ == "__main__":
+ run()
\ No newline at end of file
diff --git a/gazelle/python/testdata/binary_without_entrypoint/main2.py b/gazelle/python/testdata/binary_without_entrypoint/main2.py
new file mode 100644
index 0000000..84e642a
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/main2.py
@@ -0,0 +1,2 @@
+if __name__ == "__main__":
+ run()
diff --git a/gazelle/python/testdata/binary_without_entrypoint/main_test.py b/gazelle/python/testdata/binary_without_entrypoint/main_test.py
new file mode 100644
index 0000000..505a766
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/main_test.py
@@ -0,0 +1,7 @@
+import unittest
+
+class TestMain(unittest.unittest):
+ pass
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/gazelle/python/testdata/binary_without_entrypoint/test.yaml b/gazelle/python/testdata/binary_without_entrypoint/test.yaml
new file mode 100644
index 0000000..44e4ae8
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/test.yaml
@@ -0,0 +1,18 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+expect:
+ stderr: |
+ gazelle: failed to generate target "//:collided_main" of kind "py_binary": a target of kind "filegroup" with the same name already exists