build(gazelle): embed Python zip file (#1485)

The runtime dependencies of Gazelle Python extension makes it hard to
distribute Gazelle binaries: we have to preserve the runfiles structure
and distribute it with Gazelle binaries.

Instead, we can build a single Python zip file that comes a built-in
interpreter, and embed the zip file into the Go binary in compile time
and avoid the runtime dependency.

Fixes #1455

---------

Co-authored-by: Ignas Anikevicius <240938+aignas@users.noreply.github.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e13868a..ddfed3f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,7 @@
 * Make `//python/pip_install:pip_repository_bzl` `bzl_library` target internal
   as all of the publicly available symbols (etc. `package_annotation`) are
   re-exported via `//python:pip_bzl` `bzl_library`.
+* Gazelle Python extension no longer has runtime dependencies. Using `GAZELLE_PYTHON_RUNTIME_DEPS` from `@rules_python_gazelle_plugin//:def.bzl` is no longer necessary.
 
 ### Fixed
 
diff --git a/examples/build_file_generation/BUILD.bazel b/examples/build_file_generation/BUILD.bazel
index 79f6251..a03af54 100644
--- a/examples/build_file_generation/BUILD.bazel
+++ b/examples/build_file_generation/BUILD.bazel
@@ -6,7 +6,6 @@
 load("@pip//:requirements.bzl", "all_whl_requirements")
 load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")
 load("@rules_python//python:pip.bzl", "compile_pip_requirements")
-load("@rules_python_gazelle_plugin//:def.bzl", "GAZELLE_PYTHON_RUNTIME_DEPS")
 load("@rules_python_gazelle_plugin//manifest:defs.bzl", "gazelle_python_manifest")
 load("@rules_python_gazelle_plugin//modules_mapping:def.bzl", "modules_mapping")
 
@@ -56,7 +55,6 @@
 # See https://github.com/bazelbuild/bazel-gazelle/blob/master/extend.rst#example
 gazelle(
     name = "gazelle",
-    data = GAZELLE_PYTHON_RUNTIME_DEPS,
     gazelle = "@rules_python_gazelle_plugin//python:gazelle_binary",
 )
 
diff --git a/examples/bzlmod_build_file_generation/BUILD.bazel b/examples/bzlmod_build_file_generation/BUILD.bazel
index 9b2e5bd..67288d6 100644
--- a/examples/bzlmod_build_file_generation/BUILD.bazel
+++ b/examples/bzlmod_build_file_generation/BUILD.bazel
@@ -9,7 +9,6 @@
 load("@pip//:requirements.bzl", "all_whl_requirements")
 load("@rules_python//python:defs.bzl", "py_binary", "py_library", "py_test")
 load("@rules_python//python:pip.bzl", "compile_pip_requirements")
-load("@rules_python_gazelle_plugin//:def.bzl", "GAZELLE_PYTHON_RUNTIME_DEPS")
 load("@rules_python_gazelle_plugin//manifest:defs.bzl", "gazelle_python_manifest")
 load("@rules_python_gazelle_plugin//modules_mapping:def.bzl", "modules_mapping")
 
@@ -70,7 +69,6 @@
 # See: https://github.com/bazelbuild/bazel-gazelle#fix-and-update
 gazelle(
     name = "gazelle",
-    data = GAZELLE_PYTHON_RUNTIME_DEPS,
     gazelle = "@rules_python_gazelle_plugin//python:gazelle_binary",
 )
 
diff --git a/gazelle/README.md b/gazelle/README.md
index b8be32f..c32f0d8 100644
--- a/gazelle/README.md
+++ b/gazelle/README.md
@@ -7,7 +7,9 @@
 
 This directory contains a plugin for
 [Gazelle](https://github.com/bazelbuild/bazel-gazelle)
-that generates BUILD files content for Python code.
+that generates BUILD files content for Python code. When Gazelle is run as a command line tool with this plugin, it embeds a Python interpreter resolved during the plugin build.
+The behavior of the plugin is slightly different with different version of the interpreter as the Python `stdlib` changes with every minor version release.
+Distributors of Gazelle binaries should, therefore, build a Gazelle binary for each OS+CPU architecture+Minor Python version combination they are targeting.
 
 The following instructions are for when you use [bzlmod](https://docs.bazel.build/versions/5.0.0/bzlmod.html).
 Please refer to older documentation that includes instructions on how to use Gazelle
@@ -125,7 +127,6 @@
 
 ```starlark
 load("@bazel_gazelle//:def.bzl", "gazelle")
-load("@rules_python_gazelle_plugin//:def.bzl", "GAZELLE_PYTHON_RUNTIME_DEPS")
 
 # Our gazelle target points to the python gazelle binary.
 # This is the simple case where we only need one language supported.
@@ -134,7 +135,6 @@
 # See https://github.com/bazelbuild/bazel-gazelle/blob/master/extend.rst#example
 gazelle(
     name = "gazelle",
-    data = GAZELLE_PYTHON_RUNTIME_DEPS,
     gazelle = "@rules_python_gazelle_plugin//python:gazelle_binary",
 )
 ```
diff --git a/gazelle/def.bzl b/gazelle/def.bzl
index 80b1157..084b5a4 100644
--- a/gazelle/def.bzl
+++ b/gazelle/def.bzl
@@ -16,6 +16,4 @@
 """
 
 GAZELLE_PYTHON_RUNTIME_DEPS = [
-    "@rules_python_gazelle_plugin//python:parse",
-    "@rules_python_gazelle_plugin//python:std_modules",
 ]
diff --git a/gazelle/python/BUILD.bazel b/gazelle/python/BUILD.bazel
index 4cb755d..507d69e 100644
--- a/gazelle/python/BUILD.bazel
+++ b/gazelle/python/BUILD.bazel
@@ -16,10 +16,7 @@
         "std_modules.go",
         "target.go",
     ],
-    data = [
-        ":parse",
-        ":std_modules",
-    ],
+    embedsrcs = [":helper.zip"],
     importpath = "github.com/bazelbuild/rules_python/gazelle/python",
     visibility = ["//visibility:public"],
     deps = [
@@ -36,20 +33,24 @@
         "@com_github_emirpasic_gods//lists/singlylinkedlist",
         "@com_github_emirpasic_gods//sets/treeset",
         "@com_github_emirpasic_gods//utils",
-        "@io_bazel_rules_go//go/runfiles",
     ],
 )
 
 py_binary(
-    name = "parse",
-    srcs = ["parse.py"],
+    name = "helper",
+    srcs = [
+        "__main__.py",
+        "parse.py",
+        "std_modules.py",
+    ],
+    main = "__main__.py",
     visibility = ["//visibility:public"],
 )
 
-py_binary(
-    name = "std_modules",
-    srcs = ["std_modules.py"],
-    visibility = ["//visibility:public"],
+filegroup(
+    name = "helper.zip",
+    srcs = [":helper"],
+    output_group = "python_zip_file",
 )
 
 go_test(
@@ -57,12 +58,12 @@
     srcs = ["python_test.go"],
     data = [
         ":gazelle_binary",
-        ":parse",
-        ":std_modules",
+        ":helper",
     ] + glob(["testdata/**"]),
     deps = [
         "@bazel_gazelle//testtools:go_default_library",
         "@com_github_ghodss_yaml//:yaml",
+        "@io_bazel_rules_go//go/runfiles:go_default_library",
         "@io_bazel_rules_go//go/tools/bazel:go_default_library",
     ],
 )
diff --git a/gazelle/python/__main__.py b/gazelle/python/__main__.py
new file mode 100644
index 0000000..2f5a4a1
--- /dev/null
+++ b/gazelle/python/__main__.py
@@ -0,0 +1,31 @@
+# Copyright 2023 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# parse.py is a long-living program that communicates over STDIN and STDOUT.
+# STDIN receives parse requests, one per line. It outputs the parsed modules and
+# comments from all the files from each request.
+
+import parse
+import std_modules
+import sys
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        sys.exit("Please provide subcommand, either print or std_modules")
+    if sys.argv[1] == "parse":
+        sys.exit(parse.main(sys.stdin, sys.stdout))
+    elif sys.argv[1] == "std_modules":
+        sys.exit(std_modules.main(sys.stdin, sys.stdout))
+    else:
+        sys.exit("Unknown subcommand: " + sys.argv[1])
diff --git a/gazelle/python/lifecycle.go b/gazelle/python/lifecycle.go
index 592b322..6d628e9 100644
--- a/gazelle/python/lifecycle.go
+++ b/gazelle/python/lifecycle.go
@@ -16,14 +16,37 @@
 
 import (
 	"context"
+	_ "embed"
 	"github.com/bazelbuild/bazel-gazelle/language"
+	"log"
+	"os"
+)
+
+var (
+	//go:embed helper.zip
+	helperZip  []byte
+	helperPath string
 )
 
 type LifeCycleManager struct {
 	language.BaseLifecycleManager
+	pyzFilePath string
 }
 
 func (l *LifeCycleManager) Before(ctx context.Context) {
+	helperPath = os.Getenv("GAZELLE_PYTHON_HELPER")
+	if helperPath == "" {
+		pyzFile, err := os.CreateTemp("", "python_zip_")
+		if err != nil {
+			log.Fatalf("failed to write parser zip: %v", err)
+		}
+		defer pyzFile.Close()
+		helperPath = pyzFile.Name()
+		l.pyzFilePath = helperPath
+		if _, err := pyzFile.Write(helperZip); err != nil {
+			log.Fatalf("cannot write %q: %v", helperPath, err)
+		}
+	}
 	startParserProcess(ctx)
 	startStdModuleProcess(ctx)
 }
@@ -34,4 +57,7 @@
 
 func (l *LifeCycleManager) AfterResolvingDeps(ctx context.Context) {
 	shutdownStdModuleProcess()
+	if l.pyzFilePath != "" {
+		os.Remove(l.pyzFilePath)
+	}
 }
diff --git a/gazelle/python/parser.go b/gazelle/python/parser.go
index 60a3c24..ad55e03 100644
--- a/gazelle/python/parser.go
+++ b/gazelle/python/parser.go
@@ -17,6 +17,7 @@
 import (
 	"bufio"
 	"context"
+	_ "embed"
 	"encoding/json"
 	"fmt"
 	"io"
@@ -26,7 +27,6 @@
 	"strings"
 	"sync"
 
-	"github.com/bazelbuild/rules_go/go/runfiles"
 	"github.com/emirpasic/gods/sets/treeset"
 	godsutils "github.com/emirpasic/gods/utils"
 )
@@ -38,21 +38,9 @@
 )
 
 func startParserProcess(ctx context.Context) {
-	rfiles, err := runfiles.New()
-	if err != nil {
-		log.Printf("failed to create a runfiles object: %v\n", err)
-		os.Exit(1)
-	}
-
-	parseScriptRunfile, err := rfiles.Rlocation("rules_python_gazelle_plugin/python/parse")
-	if err != nil {
-		log.Printf("failed to initialize parser: %v\n", err)
-		os.Exit(1)
-	}
-
-	cmd := exec.CommandContext(ctx, parseScriptRunfile)
-	cmd.Env = append(os.Environ(), rfiles.Env()...)
-
+	// due to #691, we need a system interpreter to boostrap, part of which is
+	// to locate the hermetic interpreter.
+	cmd := exec.CommandContext(ctx, "python3", helperPath, "parse")
 	cmd.Stderr = os.Stderr
 
 	stdin, err := cmd.StdinPipe()
diff --git a/gazelle/python/python_test.go b/gazelle/python/python_test.go
index 79450ad..74bd85b 100644
--- a/gazelle/python/python_test.go
+++ b/gazelle/python/python_test.go
@@ -31,6 +31,7 @@
 	"time"
 
 	"github.com/bazelbuild/bazel-gazelle/testtools"
+	"github.com/bazelbuild/rules_go/go/runfiles"
 	"github.com/bazelbuild/rules_go/go/tools/bazel"
 	"github.com/ghodss/yaml"
 )
@@ -159,6 +160,11 @@
 		cmd.Stdout = &stdout
 		cmd.Stderr = &stderr
 		cmd.Dir = workspaceRoot
+		helperScript, err := runfiles.Rlocation("rules_python_gazelle_plugin/python/helper")
+		if err != nil {
+			t.Fatalf("failed to initialize Python heler: %v", err)
+		}
+		cmd.Env = append(os.Environ(), "GAZELLE_PYTHON_HELPER="+helperScript)
 		if err := cmd.Run(); err != nil {
 			var e *exec.ExitError
 			if !errors.As(err, &e) {
diff --git a/gazelle/python/std_modules.go b/gazelle/python/std_modules.go
index a87deec..dd59cd8 100644
--- a/gazelle/python/std_modules.go
+++ b/gazelle/python/std_modules.go
@@ -17,6 +17,7 @@
 import (
 	"bufio"
 	"context"
+	_ "embed"
 	"fmt"
 	"io"
 	"log"
@@ -25,8 +26,6 @@
 	"strconv"
 	"strings"
 	"sync"
-
-	"github.com/bazelbuild/rules_go/go/runfiles"
 )
 
 var (
@@ -39,23 +38,12 @@
 func startStdModuleProcess(ctx context.Context) {
 	stdModulesSeen = make(map[string]struct{})
 
-	rfiles, err := runfiles.New()
-	if err != nil {
-		log.Printf("failed to create a runfiles object: %v\n", err)
-		os.Exit(1)
-	}
-
-	stdModulesScriptRunfile, err := rfiles.Rlocation("rules_python_gazelle_plugin/python/std_modules")
-	if err != nil {
-		log.Printf("failed to initialize std_modules: %v\n", err)
-		os.Exit(1)
-	}
-
-	cmd := exec.CommandContext(ctx, stdModulesScriptRunfile)
-
+	// due to #691, we need a system interpreter to boostrap, part of which is
+	// to locate the hermetic interpreter.
+	cmd := exec.CommandContext(ctx, "python3", helperPath, "std_modules")
 	cmd.Stderr = os.Stderr
 	// All userland site-packages should be ignored.
-	cmd.Env = append([]string{"PYTHONNOUSERSITE=1"}, rfiles.Env()...)
+	cmd.Env = []string{"PYTHONNOUSERSITE=1"}
 
 	stdin, err := cmd.StdinPipe()
 	if err != nil {