blob: d2f989b0a098b70df3412b5ca36feea900df906f [file] [log] [blame] [edit]
package walk
import (
"errors"
"os"
"path"
"path/filepath"
"sync"
"github.com/bazelbuild/bazel-gazelle/rule"
)
// DirInfo holds all the information about a directory that Walk2 needs.
type DirInfo struct {
// Subdirs and RegularFiles hold the names of subdirectories and regular files
// that are not ignored or excluded.
// GenFiles is a list of generated files, named in "out" or "outs" attributes
// of targets in the directory's build file.
// The content of these slices must not be modified.
Subdirs, RegularFiles, GenFiles []string
// File is the directory's build File. May be nil if the build File doesn't
// exist or contains errors.
File *rule.File
// config is the configuration used by Configurer. We may precompute this
// before Configure is called to parallelize directory traversal without
// visiting excluded subdirectories.
config *walkConfig
}
// loadDirInfo reads directory info for the directory named by the given
// slash-separated path relative to the repo root.
//
// Do not call this method directly. This should be used with w.cache.get to
// avoid redundant I/O.
//
// loadDirInfo must be called on the parent directory first and the result
// must be stored in the cache unless rel is "" (repo root).
//
// This method may return partial results with an error. For example, if the
// directory's build file contains a syntax error, the contents of the
// directory are still returned.
func (w *walker) loadDirInfo(rel string) (DirInfo, error) {
var info DirInfo
var errs []error
var err error
dir := filepath.Join(w.rootConfig.RepoRoot, rel)
entries, err := os.ReadDir(dir)
if err != nil {
errs = append(errs, err)
}
var parentConfig *walkConfig
if rel == "" {
parentConfig = getWalkConfig(w.rootConfig)
} else {
parentRel := path.Dir(rel)
if parentRel == "." {
parentRel = ""
}
parentInfo, _ := w.cache.getLoaded(parentRel)
parentConfig = parentInfo.config
}
info.File, err = loadBuildFile(parentConfig, w.rootConfig.ReadBuildFilesDir, rel, dir, entries)
if err != nil {
errs = append(errs, err)
}
info.config = configureForWalk(parentConfig, rel, info.File)
if info.config.isExcludedDir(rel) {
// Build file excludes the current directory. Ignore contents.
entries = nil
}
for _, e := range entries {
entryRel := path.Join(rel, e.Name())
e = maybeResolveSymlink(info.config, dir, entryRel, e)
if e.IsDir() && !info.config.isExcludedDir(entryRel) {
info.Subdirs = append(info.Subdirs, e.Name())
} else if !e.IsDir() && !info.config.isExcludedFile(entryRel) {
info.RegularFiles = append(info.RegularFiles, e.Name())
}
}
info.GenFiles = findGenFiles(info.config, info.File)
// Reduce cap of each slice to len, so that if the caller appends, they'll
// need to copy to a new backing array. This is defensive: it prevents
// multiple callers from overwriting the same backing array.
info.RegularFiles = info.RegularFiles[:len(info.RegularFiles):len(info.RegularFiles)]
info.Subdirs = info.Subdirs[:len(info.Subdirs):len(info.Subdirs)]
info.GenFiles = info.GenFiles[:len(info.GenFiles):len(info.GenFiles)]
return info, errors.Join(errs...)
}
// populateCache loads directory information in a parallel tree traversal.
// This has no semantic effect but should speed up I/O.
//
// populateCache should only be called when recursion is enabled. It avoids
// traversing excluded subdirectories.
func (w *walker) populateCache() {
// sem is a semaphore.
//
// Acquiring the semaphore by sending struct{}{} grants permission to spawn
// goroutine to visit a subdirectory.
//
// Each goroutine releases the semaphore for itself before acquiring it again
// for each child. This prevents a deadlock that could occur for a deeply
// nested series of directories.
sem := make(chan struct{}, 6)
var wg sync.WaitGroup
var visit func(string)
visit = func(rel string) {
info, err := w.cache.get(rel, w.loadDirInfo)
<-sem // release semaphore for self
if err != nil {
return
}
for _, subdir := range info.Subdirs {
subdirRel := path.Join(rel, subdir)
// Navigate to the subdirectory if it should be visited.
if w.shouldVisit(subdirRel, true) {
sem <- struct{}{} // acquire semaphore for child
wg.Add(1)
go func() {
defer wg.Done()
visit(subdirRel)
}()
}
}
}
// Start the traversal at the root directory.
sem <- struct{}{}
visit("")
wg.Wait()
}