feat: Creating one py_binary per main module (#1584)

Many existing Python repos don't use `__main__.py` to indicate the the
main module. Instead, they put something like below in any Python files:

```python
if __name__ == "__main__":
  main()
```

This PR makes the Gazelle extension able to recognize main modules like
this, when `__main__.py` doesn't exist. This reduces the need to create
`__main__.py` when enabling Gazelle extensions in existing Python repos.

The current behavior of creating single `py_binary` for `__main__.py` is
preserved and takes precedence. So this is a backward-compatible change.

Closes #1566.
diff --git a/examples/bzlmod/BUILD.bazel b/examples/bzlmod/BUILD.bazel
index 6a4fdb8..bb16f98 100644
--- a/examples/bzlmod/BUILD.bazel
+++ b/examples/bzlmod/BUILD.bazel
@@ -25,6 +25,7 @@
 # with pip-compile.
 compile_pip_requirements_3_10(
     name = "requirements_3_10",
+    timeout = "moderate",
     src = "requirements.in",
     requirements_txt = "requirements_lock_3_10.txt",
     requirements_windows = "requirements_windows_3_10.txt",
diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel
index 1d9460c..fd051eb 100644
--- a/gazelle/python/BUILD.bazel
+++ b/gazelle/python/BUILD.bazel
@@ -1,6 +1,6 @@
 load("@bazel_gazelle//:def.bzl", "gazelle_binary")
 load("@io_bazel_rules_go//go:def.bzl", "go_library")
-load("@rules_python//python:defs.bzl", "py_binary")
+load("@rules_python//python:defs.bzl", "py_binary", "py_test")
 load(":gazelle_test.bzl", "gazelle_test")
 
 go_library(
@@ -58,6 +58,15 @@
     visibility = ["//visibility:public"],
 )
 
+py_test(
+    name = "parse_test",
+    srcs = [
+        "parse.py",
+        "parse_test.py",
+    ],
+    imports = ["."],
+)
+
 filegroup(
     name = "helper.zip",
     srcs = [":helper"],
diff --git a/gazelle/python/__main__.py b/gazelle/python/__main__.py
index 18bc1ca..9974c66 100644
--- a/gazelle/python/__main__.py
+++ b/gazelle/python/__main__.py
@@ -23,7 +23,7 @@
 
 if __name__ == "__main__":
     if len(sys.argv) < 2:
-        sys.exit("Please provide subcommand, either print or std_modules")
+        sys.exit("Please provide subcommand, either parse or std_modules")
     if sys.argv[1] == "parse":
         sys.exit(parse.main(sys.stdin, sys.stdout))
     elif sys.argv[1] == "std_modules":
diff --git a/gazelle/python/generate.go b/gazelle/python/generate.go
index 25fb194..f812f17 100644
--- a/gazelle/python/generate.go
+++ b/gazelle/python/generate.go
@@ -20,6 +20,7 @@
 	"log"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"
 
 	"github.com/bazelbuild/bazel-gazelle/config"
@@ -89,9 +90,9 @@
 	pyTestFilenames := treeset.NewWith(godsutils.StringComparator)
 	pyFileNames := treeset.NewWith(godsutils.StringComparator)
 
-	// hasPyBinary controls whether a py_binary target should be generated for
+	// hasPyBinaryEntryPointFile controls whether a single py_binary target should be generated for
 	// this package or not.
-	hasPyBinary := false
+	hasPyBinaryEntryPointFile := false
 
 	// hasPyTestEntryPointFile and hasPyTestEntryPointTarget control whether a py_test target should
 	// be generated for this package or not.
@@ -106,8 +107,8 @@
 		ext := filepath.Ext(f)
 		if ext == ".py" {
 			pyFileNames.Add(f)
-			if !hasPyBinary && f == pyBinaryEntrypointFilename {
-				hasPyBinary = true
+			if !hasPyBinaryEntryPointFile && f == pyBinaryEntrypointFilename {
+				hasPyBinaryEntryPointFile = true
 			} else if !hasPyTestEntryPointFile && f == pyTestEntrypointFilename {
 				hasPyTestEntryPointFile = true
 			} else if f == conftestFilename {
@@ -219,7 +220,7 @@
 	collisionErrors := singlylinkedlist.New()
 
 	appendPyLibrary := func(srcs *treeset.Set, pyLibraryTargetName string) {
-		deps, err := parser.parse(srcs)
+		deps, mainModules, err := parser.parse(srcs)
 		if err != nil {
 			log.Fatalf("ERROR: %v\n", err)
 		}
@@ -228,16 +229,33 @@
 		// exists, and if it is of a different kind from the one we are
 		// generating. If so, we have to throw an error since Gazelle won't
 		// generate it correctly.
-		if args.File != nil {
-			for _, t := range args.File.Rules {
-				if t.Name() == pyLibraryTargetName && t.Kind() != actualPyLibraryKind {
-					fqTarget := label.New("", args.Rel, pyLibraryTargetName)
-					err := fmt.Errorf("failed to generate target %q of kind %q: "+
-						"a target of kind %q with the same name already exists. "+
-						"Use the '# gazelle:%s' directive to change the naming convention.",
-						fqTarget.String(), actualPyLibraryKind, t.Kind(), pythonconfig.LibraryNamingConvention)
-					collisionErrors.Add(err)
+		if err := ensureNoCollision(args.File, pyLibraryTargetName, actualPyLibraryKind); err != nil {
+			fqTarget := label.New("", args.Rel, pyLibraryTargetName)
+			err := fmt.Errorf("failed to generate target %q of kind %q: %w. "+
+				"Use the '# gazelle:%s' directive to change the naming convention.",
+				fqTarget.String(), actualPyLibraryKind, err, pythonconfig.LibraryNamingConvention)
+			collisionErrors.Add(err)
+		}
+
+		if !hasPyBinaryEntryPointFile {
+			sort.Strings(mainModules)
+			// Creating one py_binary target per main module when __main__.py doesn't exist.
+			for _, filename := range mainModules {
+				pyBinaryTargetName := strings.TrimSuffix(filepath.Base(filename), ".py")
+				if err := ensureNoCollision(args.File, pyBinaryTargetName, actualPyBinaryKind); err != nil {
+					fqTarget := label.New("", args.Rel, pyBinaryTargetName)
+					log.Printf("failed to generate target %q of kind %q: %v",
+						fqTarget.String(), actualPyBinaryKind, err)
+					continue
 				}
+				srcs.Remove(filename)
+				pyBinary := newTargetBuilder(pyBinaryKind, pyBinaryTargetName, pythonProjectRoot, args.Rel, pyFileNames).
+					addVisibility(visibility).
+					addSrc(filename).
+					addModuleDependencies(deps).
+					generateImportsAttribute().build()
+				result.Gen = append(result.Gen, pyBinary)
+				result.Imports = append(result.Imports, pyBinary.PrivateAttr(config.GazelleImportsKey))
 			}
 		}
 
@@ -270,8 +288,8 @@
 		appendPyLibrary(pyLibraryFilenames, cfg.RenderLibraryName(packageName))
 	}
 
-	if hasPyBinary {
-		deps, err := parser.parseSingle(pyBinaryEntrypointFilename)
+	if hasPyBinaryEntryPointFile {
+		deps, _, err := parser.parseSingle(pyBinaryEntrypointFilename)
 		if err != nil {
 			log.Fatalf("ERROR: %v\n", err)
 		}
@@ -282,17 +300,12 @@
 		// exists, and if it is of a different kind from the one we are
 		// generating. If so, we have to throw an error since Gazelle won't
 		// generate it correctly.
-		if args.File != nil {
-			for _, t := range args.File.Rules {
-				if t.Name() == pyBinaryTargetName && t.Kind() != actualPyBinaryKind {
-					fqTarget := label.New("", args.Rel, pyBinaryTargetName)
-					err := fmt.Errorf("failed to generate target %q of kind %q: "+
-						"a target of kind %q with the same name already exists. "+
-						"Use the '# gazelle:%s' directive to change the naming convention.",
-						fqTarget.String(), actualPyBinaryKind, t.Kind(), pythonconfig.BinaryNamingConvention)
-					collisionErrors.Add(err)
-				}
-			}
+		if err := ensureNoCollision(args.File, pyBinaryTargetName, actualPyBinaryKind); err != nil {
+			fqTarget := label.New("", args.Rel, pyBinaryTargetName)
+			err := fmt.Errorf("failed to generate target %q of kind %q: %w. "+
+				"Use the '# gazelle:%s' directive to change the naming convention.",
+				fqTarget.String(), actualPyBinaryKind, err, pythonconfig.BinaryNamingConvention)
+			collisionErrors.Add(err)
 		}
 
 		pyBinaryTarget := newTargetBuilder(pyBinaryKind, pyBinaryTargetName, pythonProjectRoot, args.Rel, pyFileNames).
@@ -310,7 +323,7 @@
 
 	var conftest *rule.Rule
 	if hasConftestFile {
-		deps, err := parser.parseSingle(conftestFilename)
+		deps, _, err := parser.parseSingle(conftestFilename)
 		if err != nil {
 			log.Fatalf("ERROR: %v\n", err)
 		}
@@ -319,16 +332,11 @@
 		// exists, and if it is of a different kind from the one we are
 		// generating. If so, we have to throw an error since Gazelle won't
 		// generate it correctly.
-		if args.File != nil {
-			for _, t := range args.File.Rules {
-				if t.Name() == conftestTargetname && t.Kind() != actualPyLibraryKind {
-					fqTarget := label.New("", args.Rel, conftestTargetname)
-					err := fmt.Errorf("failed to generate target %q of kind %q: "+
-						"a target of kind %q with the same name already exists.",
-						fqTarget.String(), actualPyLibraryKind, t.Kind())
-					collisionErrors.Add(err)
-				}
-			}
+		if err := ensureNoCollision(args.File, conftestTargetname, actualPyLibraryKind); err != nil {
+			fqTarget := label.New("", args.Rel, conftestTargetname)
+			err := fmt.Errorf("failed to generate target %q of kind %q: %w. ",
+				fqTarget.String(), actualPyLibraryKind, err)
+			collisionErrors.Add(err)
 		}
 
 		conftestTarget := newTargetBuilder(pyLibraryKind, conftestTargetname, pythonProjectRoot, args.Rel, pyFileNames).
@@ -346,7 +354,7 @@
 
 	var pyTestTargets []*targetBuilder
 	newPyTestTargetBuilder := func(srcs *treeset.Set, pyTestTargetName string) *targetBuilder {
-		deps, err := parser.parse(srcs)
+		deps, _, err := parser.parse(srcs)
 		if err != nil {
 			log.Fatalf("ERROR: %v\n", err)
 		}
@@ -354,17 +362,12 @@
 		// exists, and if it is of a different kind from the one we are
 		// generating. If so, we have to throw an error since Gazelle won't
 		// generate it correctly.
-		if args.File != nil {
-			for _, t := range args.File.Rules {
-				if t.Name() == pyTestTargetName && t.Kind() != actualPyTestKind {
-					fqTarget := label.New("", args.Rel, pyTestTargetName)
-					err := fmt.Errorf("failed to generate target %q of kind %q: "+
-						"a target of kind %q with the same name already exists. "+
-						"Use the '# gazelle:%s' directive to change the naming convention.",
-						fqTarget.String(), actualPyTestKind, t.Kind(), pythonconfig.TestNamingConvention)
-					collisionErrors.Add(err)
-				}
-			}
+		if err := ensureNoCollision(args.File, pyTestTargetName, actualPyTestKind); err != nil {
+			fqTarget := label.New("", args.Rel, pyTestTargetName)
+			err := fmt.Errorf("failed to generate target %q of kind %q: %w. "+
+				"Use the '# gazelle:%s' directive to change the naming convention.",
+				fqTarget.String(), actualPyTestKind, err, pythonconfig.TestNamingConvention)
+			collisionErrors.Add(err)
 		}
 		return newTargetBuilder(pyTestKind, pyTestTargetName, pythonProjectRoot, args.Rel, pyFileNames).
 			addSrcs(srcs).
@@ -476,3 +479,15 @@
 		return false
 	}
 }
+
+func ensureNoCollision(file *rule.File, targetName, kind string) error {
+	if file == nil {
+		return nil
+	}
+	for _, t := range file.Rules {
+		if t.Name() == targetName && t.Kind() != kind {
+			return fmt.Errorf("a target of kind %q with the same name already exists", t.Kind())
+		}
+	}
+	return nil
+}
diff --git a/gazelle/python/parse.py b/gazelle/python/parse.py
index 6c0ef69..daa6d2b 100644
--- a/gazelle/python/parse.py
+++ b/gazelle/python/parse.py
@@ -22,7 +22,7 @@
 import os
 import sys
 from io import BytesIO
-from tokenize import COMMENT, tokenize
+from tokenize import COMMENT, NAME, OP, STRING, tokenize
 
 
 def parse_import_statements(content, filepath):
@@ -59,6 +59,30 @@
     return comments
 
 
+def parse_main(content):
+    g = tokenize(BytesIO(content.encode("utf-8")).readline)
+    for token_type, token_val, start, _, _ in g:
+        if token_type != NAME or token_val != "if" or start[1] != 0:
+            continue
+        try:
+            token_type, token_val, start, _, _ = next(g)
+            if token_type != NAME or token_val != "__name__":
+                continue
+            token_type, token_val, start, _, _ = next(g)
+            if token_type != OP or token_val != "==":
+                continue
+            token_type, token_val, start, _, _ = next(g)
+            if token_type != STRING or token_val.strip("\"'") != '__main__':
+                continue
+            token_type, token_val, start, _, _ = next(g)
+            if token_type != OP or token_val != ":":
+                continue
+            return True
+        except StopIteration:
+            break
+    return False
+
+
 def parse(repo_root, rel_package_path, filename):
     rel_filepath = os.path.join(rel_package_path, filename)
     abs_filepath = os.path.join(repo_root, rel_filepath)
@@ -70,11 +94,16 @@
                 parse_import_statements, content, rel_filepath
             )
             comments_future = executor.submit(parse_comments, content)
+            main_future = executor.submit(parse_main, content)
         modules = modules_future.result()
         comments = comments_future.result()
+        has_main = main_future.result()
+
         output = {
+            "filename": filename,
             "modules": modules,
             "comments": comments,
+            "has_main": has_main,
         }
         return output
 
diff --git a/gazelle/python/parse_test.py b/gazelle/python/parse_test.py
new file mode 100644
index 0000000..3ebded4
--- /dev/null
+++ b/gazelle/python/parse_test.py
@@ -0,0 +1,39 @@
+import unittest
+import parse
+
+class TestParse(unittest.TestCase):
+    def test_not_has_main(self):
+        content = "a = 1\nb = 2"
+        self.assertFalse(parse.parse_main(content))
+
+    def test_has_main_in_function(self):
+        content = """
+def foo():
+    if __name__ == "__main__":
+        a = 3
+"""
+        self.assertFalse(parse.parse_main(content))
+
+    def test_has_main(self):
+        content = """
+import unittest
+
+from lib import main
+
+
+class ExampleTest(unittest.TestCase):
+    def test_main(self):
+        self.assertEqual(
+            "",
+            main([["A", 1], ["B", 2]]),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
+"""
+        self.assertTrue(parse.parse_main(content))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/gazelle/python/parser.go b/gazelle/python/parser.go
index 8931026..d22850b 100644
--- a/gazelle/python/parser.go
+++ b/gazelle/python/parser.go
@@ -101,7 +101,7 @@
 
 // parseSingle parses a single Python file and returns the extracted modules
 // from the import statements as well as the parsed comments.
-func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, error) {
+func (p *python3Parser) parseSingle(pyFilename string) (*treeset.Set, []string, error) {
 	pyFilenames := treeset.NewWith(godsutils.StringComparator)
 	pyFilenames.Add(pyFilename)
 	return p.parse(pyFilenames)
@@ -109,7 +109,7 @@
 
 // parse parses multiple Python files and returns the extracted modules from
 // the import statements as well as the parsed comments.
-func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, error) {
+func (p *python3Parser) parse(pyFilenames *treeset.Set) (*treeset.Set, []string, error) {
 	parserMutex.Lock()
 	defer parserMutex.Unlock()
 
@@ -122,24 +122,28 @@
 	}
 	encoder := json.NewEncoder(parserStdin)
 	if err := encoder.Encode(&req); err != nil {
-		return nil, fmt.Errorf("failed to parse: %w", err)
+		return nil, nil, fmt.Errorf("failed to parse: %w", err)
 	}
 
 	reader := bufio.NewReader(parserStdout)
 	data, err := reader.ReadBytes(0)
 	if err != nil {
-		return nil, fmt.Errorf("failed to parse: %w", err)
+		return nil, nil, fmt.Errorf("failed to parse: %w", err)
 	}
 	data = data[:len(data)-1]
 	var allRes []parserResponse
 	if err := json.Unmarshal(data, &allRes); err != nil {
-		return nil, fmt.Errorf("failed to parse: %w", err)
+		return nil, nil, fmt.Errorf("failed to parse: %w", err)
 	}
 
+	var mainModules []string
 	for _, res := range allRes {
+		if res.HasMain {
+			mainModules = append(mainModules, res.FileName)
+		}
 		annotations, err := annotationsFromComments(res.Comments)
 		if err != nil {
-			return nil, fmt.Errorf("failed to parse annotations: %w", err)
+			return nil, nil, fmt.Errorf("failed to parse annotations: %w", err)
 		}
 
 		for _, m := range res.Modules {
@@ -159,17 +163,22 @@
 		}
 	}
 
-	return modules, nil
+	return modules, mainModules, nil
 }
 
 // parserResponse represents a response returned by the parser.py for a given
 // parsed Python module.
 type parserResponse struct {
+	// FileName of the parsed module
+	FileName string
 	// The modules depended by the parsed module.
 	Modules []module `json:"modules"`
 	// The comments contained in the parsed module. This contains the
 	// annotations as they are comments in the Python module.
 	Comments []comment `json:"comments"`
+	// HasMain indicates whether the Python module has `if __name == "__main__"`
+	// at the top level
+	HasMain bool `json:"has_main"`
 }
 
 // module represents a fully-qualified, dot-separated, Python module as seen on
diff --git a/gazelle/python/testdata/binary_without_entrypoint/BUILD.in b/gazelle/python/testdata/binary_without_entrypoint/BUILD.in
new file mode 100644
index 0000000..7aace67
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/BUILD.in
@@ -0,0 +1,6 @@
+# gazelle:python_library_naming_convention py_default_library
+
+filegroup(
+    name = "collided_main",
+    srcs = ["collided_main.py"],
+)
diff --git a/gazelle/python/testdata/binary_without_entrypoint/BUILD.out b/gazelle/python/testdata/binary_without_entrypoint/BUILD.out
new file mode 100644
index 0000000..c88f2ff
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/BUILD.out
@@ -0,0 +1,34 @@
+load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")
+
+# gazelle:python_library_naming_convention py_default_library
+
+filegroup(
+    name = "collided_main",
+    srcs = ["collided_main.py"],
+)
+
+py_binary(
+    name = "main",
+    srcs = ["main.py"],
+    visibility = ["//:__subpackages__"],
+)
+
+py_binary(
+    name = "main2",
+    srcs = ["main2.py"],
+    visibility = ["//:__subpackages__"],
+)
+
+py_library(
+    name = "py_default_library",
+    srcs = [
+        "__init__.py",
+        "collided_main.py",
+    ],
+    visibility = ["//:__subpackages__"],
+)
+
+py_test(
+    name = "main_test",
+    srcs = ["main_test.py"],
+)
\ No newline at end of file
diff --git a/gazelle/python/testdata/binary_without_entrypoint/README.md b/gazelle/python/testdata/binary_without_entrypoint/README.md
new file mode 100644
index 0000000..e91250d
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/README.md
@@ -0,0 +1,4 @@
+# Binary without entrypoint
+
+This test case asserts that when there is no __main__.py, a py_binary is generated per main module, unless a main
+module main collides with existing target name.
diff --git a/gazelle/python/testdata/binary_without_entrypoint/WORKSPACE b/gazelle/python/testdata/binary_without_entrypoint/WORKSPACE
new file mode 100644
index 0000000..faff6af
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/WORKSPACE
@@ -0,0 +1 @@
+# This is a Bazel workspace for the Gazelle test data.
diff --git a/gazelle/python/testdata/binary_without_entrypoint/__init__.py b/gazelle/python/testdata/binary_without_entrypoint/__init__.py
new file mode 100644
index 0000000..7307559
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# For test purposes only.
diff --git a/gazelle/python/testdata/binary_without_entrypoint/collided_main.py b/gazelle/python/testdata/binary_without_entrypoint/collided_main.py
new file mode 100644
index 0000000..3668fcc
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/collided_main.py
@@ -0,0 +1,2 @@
+if __name__ == "__main__":
+    run()
\ No newline at end of file
diff --git a/gazelle/python/testdata/binary_without_entrypoint/main.py b/gazelle/python/testdata/binary_without_entrypoint/main.py
new file mode 100644
index 0000000..3668fcc
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/main.py
@@ -0,0 +1,2 @@
+if __name__ == "__main__":
+    run()
\ No newline at end of file
diff --git a/gazelle/python/testdata/binary_without_entrypoint/main2.py b/gazelle/python/testdata/binary_without_entrypoint/main2.py
new file mode 100644
index 0000000..84e642a
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/main2.py
@@ -0,0 +1,2 @@
+if __name__ == "__main__":
+    run()
diff --git a/gazelle/python/testdata/binary_without_entrypoint/main_test.py b/gazelle/python/testdata/binary_without_entrypoint/main_test.py
new file mode 100644
index 0000000..505a766
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/main_test.py
@@ -0,0 +1,7 @@
+import unittest
+
+class TestMain(unittest.unittest):
+    pass
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/gazelle/python/testdata/binary_without_entrypoint/test.yaml b/gazelle/python/testdata/binary_without_entrypoint/test.yaml
new file mode 100644
index 0000000..44e4ae8
--- /dev/null
+++ b/gazelle/python/testdata/binary_without_entrypoint/test.yaml
@@ -0,0 +1,18 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+expect:
+  stderr: |
+    gazelle: failed to generate target "//:collided_main" of kind "py_binary": a target of kind "filegroup" with the same name already exists