feat(fix-update): allow user to profile commands with pprof (#1685)

This adds two flags to the `fix` and the `update` command, `-cpuprofile` and `-memprofile`, which allow users to create output CPU and memory `pprof` files to better understand the performance of Gazelle. This is very helpful in debugging issues like #1688, which show up in certain repository setups.

The implementation mostly followed the instructions on https://pkg.go.dev/runtime/pprof#hdr-Profiling_a_Go_program to set up profiling options for the command in the `cmd/gazelle`.

Tests were added to ensure that the `newProfile` would return a `profile{}` that wouldn't panic, even if either provider is empty.

Co-authored-by: Fabian Meumertzheim <fabian@meumertzhe.im>
diff --git a/README.rst b/README.rst
index ebd717c..bfa7ca2 100644
--- a/README.rst
+++ b/README.rst
@@ -575,6 +575,20 @@
 |                                                                                                            |
 | By default, all languages that this Gazelle was built with are processed.                                  |
 +-------------------------------------------------------------------+----------------------------------------+
+| :flag:`-cpuprofile filename`                                      | :value:`""`                            |
++-------------------------------------------------------------------+----------------------------------------+
+| If specified, gazelle uses [runtime/pprof](https://pkg.go.dev/runtime/pprof#StartCPUProfile) to collect    |
+| CPU profiling information from the command and save it to the given file.                                          |
+|                                                                                                            |
+| By default, this is disabled                                                                               |
++-------------------------------------------------------------------+----------------------------------------+
+| :flag:`-memprofile filename`                                      | :value:`""`                            |
++-------------------------------------------------------------------+----------------------------------------+
+| If specified, gazelle uses [runtime/pprof](https://pkg.go.dev/runtime/pprof#WriteHeapProfile) to collect   |
+| memory a profile information from the command and save it to a file.                                       |
+|                                                                                                            |
+| By default, this is disabled                                                                               |
++-------------------------------------------------------------------+----------------------------------------+
 
 .. _Predefined plugins: https://github.com/bazelbuild/rules_go/blob/master/proto/core.rst#predefined-plugins
 
diff --git a/cmd/gazelle/BUILD.bazel b/cmd/gazelle/BUILD.bazel
index 464f670..e14a1af 100644
--- a/cmd/gazelle/BUILD.bazel
+++ b/cmd/gazelle/BUILD.bazel
@@ -17,6 +17,7 @@
         "gazelle.go",
         "metaresolver.go",
         "print.go",
+        "profiler.go",
         "update-repos.go",
     ],
     importpath = "github.com/bazelbuild/bazel-gazelle/cmd/gazelle",
@@ -48,6 +49,7 @@
         "fix_test.go",
         "integration_test.go",
         "langs.go",  # keep
+        "profiler_test.go",
     ],
     args = ["-go_sdk=go_sdk"],
     data = ["@go_sdk//:files"],
@@ -76,6 +78,8 @@
         "langs.go",
         "metaresolver.go",
         "print.go",
+        "profiler.go",
+        "profiler_test.go",
         "update-repos.go",
     ],
     visibility = ["//visibility:public"],
diff --git a/cmd/gazelle/fix-update.go b/cmd/gazelle/fix-update.go
index cd20b0d..4c62226 100644
--- a/cmd/gazelle/fix-update.go
+++ b/cmd/gazelle/fix-update.go
@@ -54,6 +54,7 @@
 	patchPath      string
 	patchBuffer    bytes.Buffer
 	print0         bool
+	profile        profiler
 }
 
 type emitFunc func(c *config.Config, f *rule.File) error
@@ -75,6 +76,8 @@
 	recursive      bool
 	knownImports   []string
 	repoConfigPath string
+	cpuProfile     string
+	memProfile     string
 }
 
 func (ucr *updateConfigurer) RegisterFlags(fs *flag.FlagSet, cmd string, c *config.Config) {
@@ -87,6 +90,8 @@
 	fs.BoolVar(&ucr.recursive, "r", true, "when true, gazelle will update subdirectories recursively")
 	fs.StringVar(&uc.patchPath, "patch", "", "when set with -mode=diff, gazelle will write to a file instead of stdout")
 	fs.BoolVar(&uc.print0, "print0", false, "when set with -mode=fix, gazelle will print the names of rewritten files separated with \\0 (NULL)")
+	fs.StringVar(&ucr.cpuProfile, "cpuprofile", "", "write cpu profile to `file`")
+	fs.StringVar(&ucr.memProfile, "memprofile", "", "write memory profile to `file`")
 	fs.Var(&gzflag.MultiFlag{Values: &ucr.knownImports}, "known_import", "import path for which external resolution is skipped (can specify multiple times)")
 	fs.StringVar(&ucr.repoConfigPath, "repo_config", "", "file where Gazelle should load repository configuration. Defaults to WORKSPACE.")
 }
@@ -105,6 +110,11 @@
 	if uc.patchPath != "" && !filepath.IsAbs(uc.patchPath) {
 		uc.patchPath = filepath.Join(c.WorkDir, uc.patchPath)
 	}
+	p, err := newProfiler(ucr.cpuProfile, ucr.memProfile)
+	if err != nil {
+		return err
+	}
+	uc.profile = p
 
 	dirs := fs.Args()
 	if len(dirs) == 0 {
@@ -305,6 +315,12 @@
 	// Visit all directories in the repository.
 	var visits []visitRecord
 	uc := getUpdateConfig(c)
+	defer func() {
+		if err := uc.profile.stop(); err != nil {
+			log.Printf("stopping profiler: %v", err)
+		}
+	}()
+
 	var errorsFromWalk []error
 	walk.Walk(c, cexts, uc.dirs, uc.walkMode, func(dir, rel string, c *config.Config, update bool, f *rule.File, subdirs, regularFiles, genFiles []string) {
 		// If this file is ignored or if Gazelle was not asked to update this
diff --git a/cmd/gazelle/profiler.go b/cmd/gazelle/profiler.go
new file mode 100644
index 0000000..5d6a57c
--- /dev/null
+++ b/cmd/gazelle/profiler.go
@@ -0,0 +1,53 @@
+package main
+
+import (
+	"os"
+	"runtime"
+	"runtime/pprof"
+)
+
+type profiler struct {
+	cpuProfile *os.File
+	memProfile string
+}
+
+// newProfiler creates a profiler that writes to the given files.
+// it returns an empty profiler if both files are empty.
+// so that stop() will never fail.
+func newProfiler(cpuProfile, memProfile string) (profiler, error) {
+	if cpuProfile == "" {
+		return profiler{
+			memProfile: memProfile,
+		}, nil
+	}
+
+	f, err := os.Create(cpuProfile)
+	if err != nil {
+		return profiler{}, err
+	}
+	pprof.StartCPUProfile(f)
+
+	return profiler{
+		cpuProfile: f,
+		memProfile: memProfile,
+	}, nil
+}
+
+func (p *profiler) stop() error {
+	if p.cpuProfile != nil {
+		pprof.StopCPUProfile()
+		p.cpuProfile.Close()
+	}
+
+	if p.memProfile == "" {
+		return nil
+	}
+
+	f, err := os.Create(p.memProfile)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	runtime.GC()
+	return pprof.WriteHeapProfile(f)
+}
diff --git a/cmd/gazelle/profiler_test.go b/cmd/gazelle/profiler_test.go
new file mode 100644
index 0000000..e9d0706
--- /dev/null
+++ b/cmd/gazelle/profiler_test.go
@@ -0,0 +1,73 @@
+package main
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestEmptyProfiler(t *testing.T) {
+	dir := t.TempDir()
+	tests := []struct {
+		name       string
+		cpuProfile string
+		memProfile string
+	}{
+		{
+			name:       "cpuProfile",
+			cpuProfile: filepath.Join(dir, "cpu.prof"),
+		},
+		{
+			name:       "memProfile",
+			memProfile: filepath.Join(dir, "mem.prof"),
+		},
+		{
+			name: "empty",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run("", func(t *testing.T) {
+			p, err := newProfiler(test.cpuProfile, test.memProfile)
+			if err != nil {
+				t.Fatalf("newProfiler failed: %v", err)
+			}
+			if err := p.stop(); err != nil {
+				t.Fatalf("stop failed: %v", err)
+			}
+		})
+	}
+}
+
+func TestProfiler(t *testing.T) {
+	dir := t.TempDir()
+	cpuProfileName := filepath.Join(dir, "cpu.prof")
+	memProfileName := filepath.Join(dir, "mem.prof")
+	t.Cleanup(func() {
+		os.Remove(cpuProfileName)
+		os.Remove(memProfileName)
+	})
+
+	p, err := newProfiler(cpuProfileName, memProfileName)
+	if err != nil {
+		t.Fatalf("newProfiler failed: %v", err)
+	}
+	if p.cpuProfile == nil {
+		t.Fatal("Expected cpuProfile to be non-nil")
+	}
+	if p.memProfile != memProfileName {
+		t.Fatalf("Expected memProfile to be %s, got %s", memProfileName, p.memProfile)
+	}
+
+	if err := p.stop(); err != nil {
+		t.Fatalf("stop failed: %v", err)
+	}
+
+	if _, err := os.Stat(cpuProfileName); os.IsNotExist(err) {
+		t.Fatalf("CPU profile file %s was not created", cpuProfileName)
+	}
+
+	if _, err := os.Stat(memProfileName); os.IsNotExist(err) {
+		t.Fatalf("Memory profile file %s was not created", memProfileName)
+	}
+}
diff --git a/internal/go_repository_tools_srcs.bzl b/internal/go_repository_tools_srcs.bzl
index 42dc723..b135dd2 100644
--- a/internal/go_repository_tools_srcs.bzl
+++ b/internal/go_repository_tools_srcs.bzl
@@ -19,6 +19,7 @@
     Label("//cmd/gazelle:langs.go"),
     Label("//cmd/gazelle:metaresolver.go"),
     Label("//cmd/gazelle:print.go"),
+    Label("//cmd/gazelle:profiler.go"),
     Label("//cmd/gazelle:update-repos.go"),
     Label("//cmd/generate_repo_config:BUILD.bazel"),
     Label("//cmd/generate_repo_config:generate_repo_config.go"),