blob: 4fd1bbb63d39889bceafe15a136db22b8b17825a [file] [log] [blame] [edit]
/* Copyright 2018 The Bazel Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package walk provides customizable functionality for visiting each
// subdirectory in a directory tree.
package walk
import (
"errors"
"io/fs"
"log"
"os"
"path"
"path/filepath"
"strings"
"github.com/bazelbuild/bazel-gazelle/config"
"github.com/bazelbuild/bazel-gazelle/pathtools"
"github.com/bazelbuild/bazel-gazelle/rule"
)
// Mode determines which directories Walk visits and which directories
// should be updated.
type Mode int
const (
// In VisitAllUpdateSubdirsMode, Walk visits every directory in the
// repository. The directories given to Walk and their subdirectories are
// updated.
VisitAllUpdateSubdirsMode Mode = iota
// In VisitAllUpdateDirsMode, Walk visits every directory in the repository.
// Only the directories given to Walk are updated (not their subdirectories).
VisitAllUpdateDirsMode
// In UpdateDirsMode, Walk only visits and updates directories given to Walk.
// Build files in parent directories are read in order to produce a complete
// configuration, but the callback is not called for parent directories.
UpdateDirsMode
// In UpdateSubdirsMode, Walk visits and updates the directories given to Walk
// and their subdirectories. Build files in parent directories are read in
// order to produce a complete configuration, but the callback is not called
// for parent directories.
UpdateSubdirsMode
)
// WalkFunc is a callback called by Walk in each visited directory.
//
// dir is the absolute file system path to the directory being visited.
//
// rel is the relative slash-separated path to the directory from the
// repository root. Will be "" for the repository root directory itself.
//
// c is the configuration for the current directory. This may have been
// modified by directives in the directory's build file.
//
// update is true when the build file may be updated.
//
// f is the existing build file in the directory. Will be nil if there
// was no file.
//
// subdirs is a list of base names of subdirectories within dir, not
// including excluded files.
//
// regularFiles is a list of base names of regular files within dir, not
// including excluded files or symlinks.
//
// genFiles is a list of names of generated files, found by reading
// "out" and "outs" attributes of rules in f.
//
// DEPRECATED: Use Walk2Func with Walk2 instead.
type WalkFunc func(dir, rel string, c *config.Config, update bool, f *rule.File, subdirs, regularFiles, genFiles []string)
// Walk traverses the directory tree rooted at c.RepoRoot. Walk visits
// subdirectories in depth-first post-order.
//
// When Walk visits a directory, it lists the files and subdirectories within
// that directory. If a build file is present, Walk reads the build file and
// applies any directives to the configuration (a copy of the parent directory's
// configuration is made, and the copy is modified). After visiting
// subdirectories, the callback wf may be called, depending on the mode.
//
// c is the root configuration to start with. This includes changes made by
// command line flags, but not by the root build file. This configuration
// should not be modified.
//
// cexts is a list of configuration extensions. When visiting a directory,
// before visiting subdirectories, Walk makes a copy of the parent configuration
// and Configure for each extension on the copy. If Walk sees a directive
// that is not listed in KnownDirectives of any extension, an error will
// be logged.
//
// dirs is a list of absolute, canonical file system paths of directories
// to visit.
//
// mode determines whether subdirectories of dirs should be visited recursively,
// when the wf callback should be called, and when the "update" argument
// to the wf callback should be set.
//
// wf is a function that may be called in each directory.
//
// DEPRECATED: Use Walk2 instead.
func Walk(c *config.Config, cexts []config.Configurer, dirs []string, mode Mode, wf WalkFunc) {
w2f := func(args Walk2FuncArgs) Walk2FuncResult {
wf(args.Dir, args.Rel, args.Config, args.Update, args.File, args.Subdirs, args.RegularFiles, args.GenFiles)
return Walk2FuncResult{}
}
err := Walk2(c, cexts, dirs, mode, w2f)
if err != nil {
log.Print(err)
if c.Strict {
log.Fatal("Exit as strict mode is on")
}
}
}
type Walk2Func func(args Walk2FuncArgs) Walk2FuncResult
type Walk2FuncArgs struct {
// Dir is the absolute file system path to the directory being visited.
Dir string
// rel is the relative slash-separated path to the directory from the
// repository root. Will be "" for the repository root directory itself.
Rel string
// Config is the configuration for the current directory. This may have been
// modified by directives in the directory's build file.
Config *config.Config
// Update is true when the build file may be updated.
Update bool
// File is the existing build file in the directory. Will be nil if there
// was no file.
File *rule.File
// Subdirs is a list of names of subdirectories within dir, not
// including excluded files. A directory is listed here regardless of
// whether the subdirectory contains (or will contain) a build file.
// If the update_only generation mode is enabled, this list also contains
// recursive subdirectories, up to and including those at the edge of the
// same Bazel package.
Subdirs []string
// RegularFiles is a list of names of regular files within dir, not
// including excluded files. Symbolic links to files and non-followed
// directories are included in this list. If the update_only generation mode
// is enabled, this list also contains files from recursive subdirectories
// within the same Bazel package (those that can be matched by glob).
RegularFiles []string
// GenFiles is a list of names of generated files, found by reading
// "out" and "outs" attributes of rules in f.
GenFiles []string
}
type Walk2FuncResult struct {
// Err is an error encountered by the callback function. It's logged to the
// console. When Config.Strict is set, setting Err causes Walk2 to return
// early.
Err error
// RelsToVisit is a list of additional directories to visit. Each directory is
// a slash-separated path, relative to the repository root or "" for the root
// directory itself.
//
// These directories will be visited after the directories the walk was
// already going to visit. They will not be visited more than once in total.
// When one of these directories is visited, the Walk2Args.Update flag will
// be false unless the directory was already going to be visited with the
// Update flag true as part of the walk.
//
// This list may contain non-existent directories.
RelsToVisit []string
}
// Walk2 traverses a limited part of the directory tree rooted at c.RepoRoot
// and calls the function wf in each visited directory.
//
// The dirs and mode parameters determine which directories Walk2 visits.
// Walk2 calls wf in each directory in dirs with the Walk2FuncArgs.Update
// flag set to true. This indicates Gazelle should update build files in that
// directory. Depending on the mode flag, Walk2 may additionally visit
// subdirectories or all directories in the repo, possibly with the Update
// flag set.
//
// Some directives like "# gazelle:exclude" and files like .bazelignore
// control the traversal, excluding certain files and directories.
//
// The traversal is done in post-order, but configuration directives are always
// applied from build files in parent directories first. Concretely, this means
// that language.Configurer.Configure is called on each extension in cexts in a
// directory *before* visiting its subdirectories; wf is called in a directory
// *after* its subdirectories.
func Walk2(c *config.Config, cexts []config.Configurer, dirs []string, mode Mode, wf Walk2Func) error {
w, err := newWalker(c, cexts, dirs, mode, wf)
if err != nil {
return err
}
cleanup := setGlobalWalker(w)
defer cleanup()
// Do the main tree walk, visiting directories the user requested.
w.visit(c, "", false)
if c.Strict && len(w.errs) > 0 {
return errors.Join(w.errs...)
}
// Visit additional directories that extensions requested for indexing.
// Don't visit subdirectories recursively, even when recursion is enabled.
w.mode = UpdateDirsMode
for len(w.relsToVisit) > 0 {
// Don't simply range over relsToVisit. We may append more.
relToVisit := w.relsToVisit[0]
w.relsToVisit = w.relsToVisit[1:]
// Make sure to visit prefixes of relToVisit as well so we apply
// configuration directives.
pathtools.Prefixes(relToVisit)(func(rel string) bool {
if v, ok := w.visits[rel]; !ok {
var c *config.Config
if ok {
// Already configured this directory but did not call the callback.
c = v.c
} else {
// Never visited this directory.
parentRel := path.Dir(rel)
if parentRel == "." {
parentRel = ""
}
parentCfg := w.visits[parentRel].c
if getWalkConfig(parentCfg).isExcludedDir(rel) {
return false
}
if _, err := w.cache.get(rel, w.loadDirInfo); err != nil {
// Error loading directory. Most commonly, this is because the
// directory doesn't exist, but it could actually be a file
// or we don't have permission, or some other I/O error.
// Skip it.
return false
}
c = parentCfg.Clone()
}
w.visit(c, rel, false)
if c.Strict && len(w.errs) > 0 {
return false
}
}
return true
})
}
return errors.Join(w.errs...)
}
// walker holds state needed for a walk of the source tree.
type walker struct {
// repoRoot is the absolute file path to the repo's root directory.
repoRoot string
// rootConfig is the configuration for the repo root directory.
rootConfig *config.Config
// cache provides access to directory information.
cache *cache
// cexts is a list of configuration extensions, provided by the caller.
cexts []config.Configurer
// knownDirectives is a list of directives supported by those extensions.
knownDirectives map[string]bool
// mode determines how directories are visited, provided by the caller.
mode Mode
// shouldUpdateRel indicates whether we should update a set of directories
// named by slash-separated repo-root-relative paths. The set is generated
// from the list of directories passed in to Walk2. This map contains true
// for explicitly listed directories, and false for ancestor directories
// that are not explicitly listed.
shouldUpdateRel map[string]bool
// wf is the callback provided by the caller. It's called in each directory
// that needs to be updated or indexed, determined by mode.
wf Walk2Func
// visits holds a record of each time visit was called, keyed by
// slash-separated repo-root-relative path. It prevents visiting
// the same directory more than once and tracks information that's needed
// by parents.
visits map[string]visitInfo
// relsToVisit is a list of slash-separated repo-root-relative paths to
// additional directories to visit. These directories are not visited
// recursively. wf is called with Walk2FuncArgs.Update false.
relsToVisit []string
// relsToVisitSeen indicates whether a string was added to relsToVisit.
// It's used to avoid appending a path more than once.
relsToVisitSeen map[string]struct{}
// errs is a list of errors encountered while walking the directory tree.
// If the Config.Strict flag is set in the root configuration, we return
// quickly after the first error.
errs []error
}
type visitInfo struct {
// containedByParent is true if the directory does not (and should not)
// contain a build file. The parent directory may use regularFiles
// and subdirs.
containedByParent bool
c *config.Config
regularFiles, subdirs []string
}
func newWalker(c *config.Config, cexts []config.Configurer, dirs []string, mode Mode, wf Walk2Func) (*walker, error) {
knownDirectives := make(map[string]bool)
for _, cext := range cexts {
for _, d := range cext.KnownDirectives() {
knownDirectives[d] = true
}
}
rels := make([]string, len(dirs))
for i, dir := range dirs {
rel, err := filepath.Rel(c.RepoRoot, dir)
if err != nil {
return nil, err
}
rel = filepath.ToSlash(rel)
if rel == "." {
rel = ""
}
rels[i] = rel
}
shouldUpdateRel := make(map[string]bool)
for _, rel := range rels {
i := 0
for {
next := strings.IndexByte(rel[i:], '/') + i
if next-i < 0 {
shouldUpdateRel[rel] = true
break
}
prefix := rel[:next]
if _, ok := shouldUpdateRel[prefix]; !ok {
shouldUpdateRel[prefix] = false
}
i = next + 1
}
}
w := &walker{
repoRoot: c.RepoRoot,
rootConfig: c,
cache: new(cache),
cexts: cexts,
knownDirectives: knownDirectives,
mode: mode,
wf: wf,
shouldUpdateRel: shouldUpdateRel,
visits: make(map[string]visitInfo),
relsToVisitSeen: make(map[string]struct{}),
}
// Asynchronously populate the walker cache in the background.
go w.populateCache()
return w, nil
}
// shouldVisit returns whether the visit method should be called on rel.
// We always need to visit directories requested by the caller and their
// parents. We may also need to visit subdirectories.
func (w *walker) shouldVisit(rel string, updateParent bool) bool {
switch w.mode {
case VisitAllUpdateSubdirsMode, VisitAllUpdateDirsMode:
return true
case UpdateSubdirsMode:
_, ok := w.shouldUpdateRel[rel]
return ok || updateParent
default: // UpdateDirsMode
_, ok := w.shouldUpdateRel[rel]
return ok
}
}
// shouldUpdate returns true if Walk should pass true to the callback's update
// parameter in the directory rel. This indicates the build file should be
// updated.
func (w *walker) shouldUpdate(rel string, updateParent bool) bool {
if (w.mode == VisitAllUpdateSubdirsMode || w.mode == UpdateSubdirsMode) && updateParent {
return true
}
return w.shouldUpdateRel[rel]
}
// visit is the main recursive function of walker. It visits one directory,
// possibly recurses into subdirectories, and possible calls the callback.
//
// updateParent should indicate whether the the current mode tells Gazelle
// to call the callback in the parent directory with update = true (see
// shouldUpdate). The callback may not actually be called if the build file
// contains syntax errors or a gazelle:ignore directive.
func (w *walker) visit(c *config.Config, rel string, updateParent bool) {
// Absolute path to the directory being visited
dir := filepath.Join(c.RepoRoot, rel)
// Load the build file and directory metadata.
info, err := w.cache.get(rel, w.loadDirInfo)
if err != nil {
w.errs = append(w.errs, err)
}
hasBuildFileError := err != nil
wc := info.config
if wc.isExcludedDir(rel) {
return
}
containedByParent := info.File == nil && wc.updateOnly
// Configure the directory, if we haven't done so already.
_, alreadyConfigured := w.visits[rel]
if !containedByParent && !alreadyConfigured {
configure(w.cexts, w.knownDirectives, c, rel, info.File, info.config)
}
regularFiles := info.RegularFiles
subdirs := info.Subdirs
shouldUpdate := w.shouldUpdate(rel, updateParent)
w.visits[rel] = visitInfo{
c: c,
containedByParent: containedByParent,
regularFiles: regularFiles,
subdirs: subdirs,
}
// Visit subdirectories, as needed.
for _, subdir := range subdirs {
subdirRel := path.Join(rel, subdir)
if w.shouldVisit(subdirRel, shouldUpdate) {
w.visit(c.Clone(), subdirRel, shouldUpdate)
if c.Strict && len(w.errs) > 0 {
return
}
}
}
// Recursively collect regular files from subdirectories that won't contain
// build files. Files are added in depth-first pre-order.
if !containedByParent {
var collect func(string, string)
collect = func(rel, prefix string) {
vi := w.visits[rel]
if !vi.containedByParent {
return
}
for _, f := range vi.regularFiles {
regularFiles = append(regularFiles, path.Join(prefix, f))
}
for _, f := range vi.subdirs {
subdirs = append(subdirs, path.Join(prefix, f))
}
for _, subdir := range vi.subdirs {
collect(path.Join(rel, subdir), path.Join(prefix, subdir))
}
}
for _, subdir := range subdirs {
collect(path.Join(rel, subdir), subdir)
}
// Call the callback to update this directory.
update := !wc.ignore && shouldUpdate && !hasBuildFileError
result := w.wf(Walk2FuncArgs{
Dir: dir,
Rel: rel,
Config: c,
Update: update,
File: info.File,
Subdirs: subdirs,
RegularFiles: regularFiles,
GenFiles: info.GenFiles,
})
if result.Err != nil {
w.errs = append(w.errs, result.Err)
}
for _, relToVisit := range result.RelsToVisit {
// Normalize RelsToVisit to clean relative paths and convert root "."
// to an empty string.
relToVisit = path.Clean(relToVisit)
if relToVisit == "." {
relToVisit = ""
}
if _, ok := w.relsToVisitSeen[relToVisit]; !ok {
w.relsToVisit = append(w.relsToVisit, relToVisit)
w.relsToVisitSeen[relToVisit] = struct{}{}
}
}
}
}
func loadBuildFile(wc *walkConfig, readBuildFilesDir string, pkg, dir string, ents []fs.DirEntry) (*rule.File, error) {
var err error
readDir := dir
readEnts := ents
if readBuildFilesDir != "" {
readDir = filepath.Join(readBuildFilesDir, filepath.FromSlash(pkg))
readEnts, err = os.ReadDir(readDir)
if err != nil {
return nil, err
}
}
path := rule.MatchBuildFile(readDir, wc.validBuildFileNames, readEnts)
if path == "" {
return nil, nil
}
return rule.LoadFile(path, pkg)
}
func configure(cexts []config.Configurer, knownDirectives map[string]bool, c *config.Config, rel string, f *rule.File, wc *walkConfig) {
if f != nil {
for _, d := range f.Directives {
if !knownDirectives[d.Key] {
log.Printf("%s: unknown directive: gazelle:%s", f.Path, d.Key)
if c.Strict {
// TODO(https://github.com/bazelbuild/bazel-gazelle/issues/1029):
// Refactor to accumulate and propagate errors to main.
log.Fatal("Exit as strict mode is on")
}
}
}
}
c.Exts[walkNameCached] = wc
for _, cext := range cexts {
cext.Configure(c, rel, f)
}
}
func findGenFiles(wc *walkConfig, f *rule.File) []string {
if f == nil {
return nil
}
var strs []string
for _, r := range f.Rules {
for _, key := range []string{"out", "outs"} {
if s := r.AttrString(key); s != "" {
strs = append(strs, s)
} else if ss := r.AttrStrings(key); len(ss) > 0 {
strs = append(strs, ss...)
}
}
}
var genFiles []string
for _, s := range strs {
if !wc.isExcludedFile(path.Join(f.Pkg, s)) {
genFiles = append(genFiles, s)
}
}
return genFiles
}
// maybeResolveSymlink conditionally resolves a symbolic link.
//
// If ent is a symbolic link and Gazelle is configured to follow it (with
// # gazelle:follow), then maybeResolveSymlink resolves the link and returns it.
// The returned entry has the original name, but other metadata describes
// the target file or directory.
//
// Otherwise, maybeResolveSymlink returns ent as-is.
func maybeResolveSymlink(wc *walkConfig, dir, rel string, ent fs.DirEntry) fs.DirEntry {
if ent.Type()&os.ModeSymlink == 0 {
// Not a symlink, use the original FileInfo.
return ent
}
if !wc.shouldFollow(rel) {
// A symlink, but not one we should follow.
return ent
}
fi, err := os.Stat(path.Join(dir, ent.Name()))
if err != nil {
// A symlink, but not one we could resolve.
return ent
}
return fs.FileInfoToDirEntry(fi)
}