pw_presubmit/py/pw_presubmit/banned_words.py - pigweed/pigweed - Git at Google

 # Copyright 2021 The Pigweed Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not
 # use this file except in compliance with the License. You may obtain a copy of
 # the License at
 #
 #     https://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 # License for the specific language governing permissions and limitations under
 # the License.
 """Banned words presubmit check."""

 import collections
 from pathlib import Path
 import re
 from typing import Dict, List

 from . import presubmit

 # List borrowed from Android:
 # https://source.android.com/setup/contribute/respectful-code
 # banned-words: disable
 BANNED_WORDS = [
     r'master',
     r'slave',
     r'(white|gr[ae]y|black)\s*(list|hat)',
     r'craz(y|ie)',
     r'insane',
     r'crip+led?',
     r'sanity',
     r'sane',
     r'dummy',
     r'grandfather',
     r's?he',
     r'his',
     r'her',
     r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle',
     r'mitm',
 ]
 # banned-words: enable

 # Test: master  # banned-words: ignore
 # Test: master


 def _process_banned_words(*words):
     """Turn banned-words list into one big regex with common inflections."""

     if not words:
         words = tuple(BANNED_WORDS)

     all_words = []
     for entry in words:
         if isinstance(entry, str):
             all_words.append(entry)
         elif isinstance(entry, (list, tuple)):
             all_words.extend(entry)
         all_words.extend(x for x in words)
     all_words = tuple(all_words)

     # Confirm each individual word compiles as a valid regex.
     for word in all_words:
         _ = re.compile(word)

     return re.compile(
         r"\b({})(\b|e?[sd]\b)".format('|'.join(all_words)),
         re.IGNORECASE,
     )


 BANNED_WORDS_REGEX = _process_banned_words()

 # If seen, ignore this line and the next.
 _IGNORE = 'banned-words: ignore'

 # Ignore a whole section. Please do not change the order of these lines.
 _DISABLE = 'banned-words: disable'
 _ENABLE = 'banned-words: enable'


 def banned_words(
     ctx: presubmit.PresubmitContext,
     words_regex=BANNED_WORDS_REGEX,
 ):
     """Presubmit check that ensures files do not contain banned words."""

     Match = collections.namedtuple('Match', 'line word')
     found_words: Dict[Path, List[Match]] = {}

     for path in ctx.paths:
         try:
             with open(path, 'r') as ins:
                 enabled = True
                 prev = ''
                 for i, line in enumerate(ins, start=1):
                     if _DISABLE in line:
                         enabled = False
                     if _ENABLE in line:
                         enabled = True

                     # If we see the ignore line on this or the previous line we
                     # ignore any bad words on this line.
                     ignored = _IGNORE in prev or _IGNORE in line

                     if enabled and not ignored:
                         match = words_regex.search(line)

                         if match:
                             found_words.setdefault(path, [])
                             found_words[path].append(Match(i, match.group(0)))

                     # Not using 'continue' so this line always executes.
                     prev = line

         except UnicodeDecodeError:
             # File is not text, like a gif.
             pass

     for path, matches in found_words.items():
         print('=' * 40)
         print(path)
         for match in matches:
             print(f'Found banned word "{match.word}" on line {match.line}')

     if found_words:
         raise presubmit.PresubmitFailure


 def banned_words_checker(*words):
     """Create banned words checker for the given list of banned words."""

     regex = _process_banned_words(*words)

     def banned_words(  # pylint: disable=redefined-outer-name
         ctx: presubmit.PresubmitContext):
         globals()['banned_words'](ctx, regex)

     return banned_words
	# Copyright 2021 The Pigweed Authors
	#
	# Licensed under the Apache License, Version 2.0 (the "License"); you may not
	# use this file except in compliance with the License. You may obtain a copy of
	# the License at
	#
	# https://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	# License for the specific language governing permissions and limitations under
	# the License.
	"""Banned words presubmit check."""

	import collections
	from pathlib import Path
	import re
	from typing import Dict, List

	from . import presubmit

	# List borrowed from Android:
	# https://source.android.com/setup/contribute/respectful-code
	# banned-words: disable
	BANNED_WORDS = [
	r'master',
	r'slave',
	r'(white\|gr[ae]y\|black)\s*(list\|hat)',
	r'craz(y\|ie)',
	r'insane',
	r'crip+led?',
	r'sanity',
	r'sane',
	r'dummy',
	r'grandfather',
	r's?he',
	r'his',
	r'her',
	r'm[ae]n[-\s]in[-\s]the[-\s]*middle',
	r'mitm',
	]
	# banned-words: enable

	# Test: master # banned-words: ignore
	# Test: master


	def _process_banned_words(*words):
	"""Turn banned-words list into one big regex with common inflections."""

	if not words:
	words = tuple(BANNED_WORDS)

	all_words = []
	for entry in words:
	if isinstance(entry, str):
	all_words.append(entry)
	elif isinstance(entry, (list, tuple)):
	all_words.extend(entry)
	all_words.extend(x for x in words)
	all_words = tuple(all_words)

	# Confirm each individual word compiles as a valid regex.
	for word in all_words:
	_ = re.compile(word)

	return re.compile(
	r"\b({})(\b\|e?[sd]\b)".format('\|'.join(all_words)),
	re.IGNORECASE,
	)


	BANNED_WORDS_REGEX = _process_banned_words()

	# If seen, ignore this line and the next.
	_IGNORE = 'banned-words: ignore'

	# Ignore a whole section. Please do not change the order of these lines.
	_DISABLE = 'banned-words: disable'
	_ENABLE = 'banned-words: enable'


	def banned_words(
	ctx: presubmit.PresubmitContext,
	words_regex=BANNED_WORDS_REGEX,
	):
	"""Presubmit check that ensures files do not contain banned words."""

	Match = collections.namedtuple('Match', 'line word')
	found_words: Dict[Path, List[Match]] = {}

	for path in ctx.paths:
	try:
	with open(path, 'r') as ins:
	enabled = True
	prev = ''
	for i, line in enumerate(ins, start=1):
	if _DISABLE in line:
	enabled = False
	if _ENABLE in line:
	enabled = True

	# If we see the ignore line on this or the previous line we
	# ignore any bad words on this line.
	ignored = _IGNORE in prev or _IGNORE in line

	if enabled and not ignored:
	match = words_regex.search(line)

	if match:
	found_words.setdefault(path, [])
	found_words[path].append(Match(i, match.group(0)))

	# Not using 'continue' so this line always executes.
	prev = line

	except UnicodeDecodeError:
	# File is not text, like a gif.
	pass

	for path, matches in found_words.items():
	print('=' * 40)
	print(path)
	for match in matches:
	print(f'Found banned word "{match.word}" on line {match.line}')

	if found_words:
	raise presubmit.PresubmitFailure


	def banned_words_checker(*words):
	"""Create banned words checker for the given list of banned words."""

	regex = _process_banned_words(*words)

	def banned_words( # pylint: disable=redefined-outer-name
	ctx: presubmit.PresubmitContext):
	globals()['banned_words'](ctx, regex)

	return banned_words