pw_presubmit: Add banned words check
Add check for non-inclusive language.
Change-Id: Iea485b4e2a40f9db56ef84b97930848eeb9114cc
Bug: 386
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/46360
Pigweed-Auto-Submit: Rob Mohr <mohrr@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
Reviewed-by: Keir Mierle <keir@google.com>
diff --git a/pw_presubmit/docs.rst b/pw_presubmit/docs.rst
index 2173f6e..e53238e 100644
--- a/pw_presubmit/docs.rst
+++ b/pw_presubmit/docs.rst
@@ -127,8 +127,9 @@
sys.exit(2)
import pw_presubmit
- from pw_presubmit import build, cli, environment, format_code, git_repo
- from pw_presubmit import python_checks, filter_paths, PresubmitContext
+ from pw_presubmit import banned_words, build, cli, environment, format_code
+ from pw_presubmit import git_repo, python_checks, filter_paths
+ from pw_presubmit import PresubmitContext
from pw_presubmit.install_hook import install_hook
# Set up variables for key project paths.
@@ -184,14 +185,13 @@
# Presubmit check programs
#
QUICK = (
- # Initialize an environment for running presubmit checks.
- init_cipd,
- init_virtualenv,
# List some presubmit checks to run
pragma_once,
host_tests,
# Use the upstream formatting checks, with custom path filters applied.
format_code.presubmit_checks(exclude=PATH_EXCLUSIONS),
+ # Include the upstream inclusive language check.
+ banned_words.banned_words,
)
FULL = (
@@ -238,4 +238,4 @@
---------------------
The ``pw_presubmit.format_code`` module formats supported source files using
external code format tools. The file ``format_code.py`` can be invoked directly
-from the command line or from ``pw`` as ``pw format``.
\ No newline at end of file
+from the command line or from ``pw`` as ``pw format``.
diff --git a/pw_presubmit/py/BUILD.gn b/pw_presubmit/py/BUILD.gn
index cfaa150..444ab0f 100644
--- a/pw_presubmit/py/BUILD.gn
+++ b/pw_presubmit/py/BUILD.gn
@@ -20,6 +20,7 @@
setup = [ "setup.py" ]
sources = [
"pw_presubmit/__init__.py",
+ "pw_presubmit/banned_words.py",
"pw_presubmit/build.py",
"pw_presubmit/cli.py",
"pw_presubmit/environment.py",
diff --git a/pw_presubmit/py/pw_presubmit/banned_words.py b/pw_presubmit/py/pw_presubmit/banned_words.py
new file mode 100644
index 0000000..eae763b
--- /dev/null
+++ b/pw_presubmit/py/pw_presubmit/banned_words.py
@@ -0,0 +1,141 @@
+# Copyright 2021 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""Banned words presubmit check."""
+
+import collections
+from pathlib import Path
+import re
+from typing import Dict, List
+
+from . import presubmit
+
+# List borrowed from Android:
+# https://source.android.com/setup/contribute/respectful-code
+# banned-words: disable
+BANNED_WORDS = [
+ r'master',
+ r'slave',
+ r'(white|gr[ae]y|black)\s*(list|hat)',
+ r'craz(y|ie)',
+ r'insane',
+ r'crip+led?',
+ r'sanity',
+ r'sane',
+ r'dummy',
+ r'grandfather',
+ r's?he',
+ r'his',
+ r'her',
+ r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle',
+ r'mitm',
+]
+# banned-words: enable
+
+# Test: master # banned-words: ignore
+# Test: master
+
+
+def _process_banned_words(*words):
+ """Turn banned-words list into one big regex with common inflections."""
+
+ if not words:
+ words = tuple(BANNED_WORDS)
+
+ all_words = []
+ for entry in words:
+ if isinstance(entry, str):
+ all_words.append(entry)
+ elif isinstance(entry, (list, tuple)):
+ all_words.extend(entry)
+ all_words.extend(x for x in words)
+ all_words = tuple(all_words)
+
+ # Confirm each individual word compiles as a valid regex.
+ for word in all_words:
+ _ = re.compile(word)
+
+ return re.compile(
+ r"\b({})(\b|e?[sd]\b)".format('|'.join(all_words)),
+ re.IGNORECASE,
+ )
+
+
+BANNED_WORDS_REGEX = _process_banned_words()
+
+# If seen, ignore this line and the next.
+_IGNORE = 'banned-words: ignore'
+
+# Ignore a whole section. Please do not change the order of these lines.
+_DISABLE = 'banned-words: disable'
+_ENABLE = 'banned-words: enable'
+
+
+def banned_words(
+ ctx: presubmit.PresubmitContext,
+ words_regex=BANNED_WORDS_REGEX,
+):
+ """Presubmit check that ensures files do not contain banned words."""
+
+ Match = collections.namedtuple('Match', 'line word')
+ found_words: Dict[Path, List[Match]] = {}
+
+ for path in ctx.paths:
+ try:
+ with open(path, 'r') as ins:
+ enabled = True
+ prev = ''
+ for i, line in enumerate(ins, start=1):
+ if _DISABLE in line:
+ enabled = False
+ if _ENABLE in line:
+ enabled = True
+
+ # If we see the ignore line on this or the previous line we
+ # ignore any bad words on this line.
+ ignored = _IGNORE in prev or _IGNORE in line
+
+ if enabled and not ignored:
+ match = words_regex.search(line)
+
+ if match:
+ found_words.setdefault(path, [])
+ found_words[path].append(Match(i, match.group(0)))
+
+ # Not using 'continue' so this line always executes.
+ prev = line
+
+ except UnicodeDecodeError:
+ # File is not text, like a gif.
+ pass
+
+ for path, matches in found_words.items():
+ print('=' * 40)
+ print(path)
+ for match in matches:
+ print(f'Found banned word "{match.word}" on line {match.line}')
+
+ if found_words:
+ raise presubmit.PresubmitFailure
+
+
+def banned_words_checker(*words):
+ """Create banned words checker for the given list of banned words."""
+
+ regex = _process_banned_words(*words)
+
+ def banned_words( # pylint: disable=redefined-outer-name
+ ctx: presubmit.PresubmitContext):
+ globals()['banned_words'](ctx, regex)
+
+ return banned_words
diff --git a/pw_presubmit/py/pw_presubmit/pigweed_presubmit.py b/pw_presubmit/py/pw_presubmit/pigweed_presubmit.py
index 4e6676b..d2e0040 100755
--- a/pw_presubmit/py/pw_presubmit/pigweed_presubmit.py
+++ b/pw_presubmit/py/pw_presubmit/pigweed_presubmit.py
@@ -35,7 +35,7 @@
import pw_package.pigweed_packages
-from pw_presubmit import build, cli, format_code, git_repo
+from pw_presubmit import banned_words, build, cli, format_code, git_repo
from pw_presubmit import call, filter_paths, plural, PresubmitContext
from pw_presubmit import PresubmitFailure, Programs
from pw_presubmit.install_hook import install_hook
@@ -621,6 +621,7 @@
#
OTHER_CHECKS = (
+ banned_words.banned_words,
# TODO(pwbug/45): Remove clang-tidy from OTHER_CHECKS when it passes.
clang_tidy,
# Build that attempts to duplicate the build OSS-Fuzz does. Currently