Enforce that files in the `src` directory are referenced from BUILD.gn (#31960)

* Start creating a script

* Have much more functionality

* Restyle

* Add some doc comments ... this starts being usable

* Add workflow to validate that gn knows about files

* Remove controller from known exceptions: we fixed that one

* Fix flake8

* Add more known failures

* Better error reporting for gn reachability

* Remove the platform specific orphan file listing

* Force the "not failures anymore" to be fatal

---------

Co-authored-by: Andrei Litvin <andreilitvin@google.com>
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 29a3dc8..2924edf 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -42,6 +42,166 @@
               with:
                 platform: linux
 
+            - name: Check for orphaned gn files
+              if: always()
+              # We should enforce that ALL new files are referenced in our build scripts.
+              # Several things do not have a clear fix path:
+              #   - various platform implementations (including darwin-specific files as they
+              #     are not using GN)
+              #   - app/clusters (they are fetched dynamically - this should probably be fixed)
+              #
+              # All the rest of the exceptions should be driven down to 0: chip should fully
+              # be defined in build rules.
+              #
+              # This check enforces that for any newly added file, it must be part of some
+              # BUILD.gn file
+              run: |
+                  ./scripts/run_in_build_env.sh "./scripts/tools/not_known_to_gn.py \
+                     src \
+                     --skip-dir app/clusters \
+                     --skip-dir darwin \
+                     --skip-dir include \
+                     --skip-dir platform/Ameba \
+                     --skip-dir platform/android \
+                     --skip-dir platform/ASR \
+                     --skip-dir platform/Beken \
+                     --skip-dir platform/bouffalolab \
+                     --skip-dir platform/cc13xx_26xx \
+                     --skip-dir platform/cc32xx \
+                     --skip-dir platform/Darwin \
+                     --skip-dir platform/ESP32 \
+                     --skip-dir platform/fake \
+                     --skip-dir platform/FreeRTOS \
+                     --skip-dir platform/Infineon \
+                     --skip-dir platform/Linux \
+                     --skip-dir platform/mbed \
+                     --skip-dir platform/mt793x \
+                     --skip-dir platform/nxp \
+                     --skip-dir platform/OpenThread \
+                     --skip-dir platform/qpg \
+                     --skip-dir platform/silabs \
+                     --skip-dir platform/telink \
+                     --skip-dir platform/webos \
+                     --skip-dir platform/Zephyr \
+                     --skip-dir test_driver \
+                     --known-failure app/app-platform/ContentApp.cpp \
+                     --known-failure app/app-platform/ContentApp.h \
+                     --known-failure app/app-platform/ContentAppPlatform.cpp \
+                     --known-failure app/app-platform/ContentAppPlatform.h \
+                     --known-failure controller/ExamplePersistentStorage.cpp \
+                     --known-failure controller/ExamplePersistentStorage.h \
+                     --known-failure controller/java/GroupDeviceProxy.h \
+                     --known-failure controller/java/CHIPEventTLVValueDecoder.h \
+                     --known-failure controller/python/chip/credentials/cert.h \
+                     --known-failure controller/python/chip/server/Options.h \
+                     --known-failure controller/python/chip/crypto/p256keypair.h \
+                     --known-failure controller/python/chip/commissioning/PlaceholderOperationalCredentialsIssuer.h \
+                     --known-failure controller/python/chip/native/PyChipError.h \
+                     --known-failure app/AttributeAccessInterface.h \
+                     --known-failure app/AttributeAccessToken.h \
+                     --known-failure app/att-storage.h \
+                     --known-failure app/BufferedReadCallback.h \
+                     --known-failure app/CommandHandler.h \
+                     --known-failure app/CommandHandlerInterface.h \
+                     --known-failure app/CommandPathParams.h \
+                     --known-failure app/CommandPathRegistry.h \
+                     --known-failure app/CommandResponseSender.h \
+                     --known-failure app/CommandSender.h \
+                     --known-failure app/CommandSenderLegacyCallback.h \
+                     --known-failure app/CompatEnumNames.h \
+                     --known-failure app/ConcreteAttributePath.h \
+                     --known-failure app/ConcreteCommandPath.h \
+                     --known-failure app/data-model/ListLargeSystemExtensions.h \
+                     --known-failure app/EventHeader.h \
+                     --known-failure app/EventLoggingDelegate.h \
+                     --known-failure app/EventLogging.h \
+                     --known-failure app/EventLoggingTypes.h \
+                     --known-failure app/EventManagement.h \
+                     --known-failure app/InteractionModelHelper.h \
+                     --known-failure app/MessageDef/ArrayBuilder.h \
+                     --known-failure app/MessageDef/ArrayParser.h \
+                     --known-failure app/MessageDef/CommandDataIB.h \
+                     --known-failure app/MessageDef/CommandPathIB.h \
+                     --known-failure app/MessageDef/CommandStatusIB.h \
+                     --known-failure app/MessageDef/EventFilterIB.h \
+                     --known-failure app/MessageDef/EventFilterIBs.h \
+                     --known-failure app/MessageDef/InvokeRequestMessage.h \
+                     --known-failure app/MessageDef/InvokeRequests.h \
+                     --known-failure app/MessageDef/InvokeResponseIB.h \
+                     --known-failure app/MessageDef/InvokeResponseIBs.h \
+                     --known-failure app/MessageDef/InvokeResponseMessage.h \
+                     --known-failure app/MessageDef/ListBuilder.h \
+                     --known-failure app/MessageDef/ListParser.h \
+                     --known-failure app/MessageDef/StatusResponseMessage.h \
+                     --known-failure app/MessageDef/StructBuilder.h \
+                     --known-failure app/MessageDef/StructParser.h \
+                     --known-failure app/MessageDef/SubscribeRequestMessage.h \
+                     --known-failure app/MessageDef/SubscribeResponseMessage.h \
+                     --known-failure app/MessageDef/TimedRequestMessage.h \
+                     --known-failure app/MessageDef/WriteRequestMessage.h \
+                     --known-failure app/MessageDef/WriteResponseMessage.h \
+                     --known-failure app/ObjectList.h \
+                     --known-failure app/ReadClient.h \
+                     --known-failure app/ReadHandler.h \
+                     --known-failure app/ReadPrepareParams.h \
+                     --known-failure app/reporting/tests/MockReportScheduler.cpp \
+                     --known-failure app/reporting/tests/MockReportScheduler.h \
+                     --known-failure app/server/AppDelegate.h \
+                     --known-failure app/TestEventTriggerDelegate.h \
+                     --known-failure app/tests/integration/common.h \
+                     --known-failure app/tests/integration/MockEvents.h \
+                     --known-failure app/tests/suites/credentials/TestHarnessDACProvider.h \
+                     --known-failure app/tests/TestOperationalDeviceProxy.cpp \
+                     --known-failure app/util/af-enums.h \
+                     --known-failure app/util/af.h \
+                     --known-failure app/util/af-types.h \
+                     --known-failure app/util/attribute-metadata.h \
+                     --known-failure app/util/attribute-storage.cpp \
+                     --known-failure app/util/attribute-storage.h \
+                     --known-failure app/util/attribute-storage-null-handling.h \
+                     --known-failure app/util/attribute-table.cpp \
+                     --known-failure app/util/attribute-table.h \
+                     --known-failure app/util/binding-table.cpp \
+                     --known-failure app/util/binding-table.h \
+                     --known-failure app/util/common.h \
+                     --known-failure app/util/config.h \
+                     --known-failure app/util/DataModelHandler.cpp \
+                     --known-failure app/util/DataModelHandler.h \
+                     --known-failure app/util/ember-compatibility-functions.cpp \
+                     --known-failure app/util/endpoint-config-api.h \
+                     --known-failure app/util/endpoint-config-defines.h \
+                     --known-failure app/util/error-mapping.h \
+                     --known-failure app/util/generic-callbacks.h \
+                     --known-failure app/util/generic-callback-stubs.cpp \
+                     --known-failure app/util/im-client-callbacks.h \
+                     --known-failure app/util/MatterCallbacks.h \
+                     --known-failure app/util/message.cpp \
+                     --known-failure app/util/mock/Constants.h \
+                     --known-failure app/util/mock/Functions.h \
+                     --known-failure app/util/mock/MockNodeConfig.h \
+                     --known-failure app/util/odd-sized-integers.h \
+                     --known-failure app/util/types_stub.h \
+                     --known-failure app/util/util.cpp \
+                     --known-failure app/util/util.h \
+                     --known-failure app/WriteClient.h \
+                     --known-failure app/WriteHandler.h \
+                     --known-failure inet/tests/TestInetLayerCommon.hpp \
+                     --known-failure lib/core/CHIPVendorIdentifiers.hpp \
+                     --known-failure lib/dnssd/Constants.h \
+                     --known-failure lib/dnssd/minimal_mdns/core/FlatAllocatedQName.h \
+                     --known-failure lib/dnssd/minimal_mdns/core/HeapQName.h \
+                     --known-failure lib/dnssd/minimal_mdns/ListenIterator.h \
+                     --known-failure lib/dnssd/minimal_mdns/tests/CheckOnlyServer.h \
+                     --known-failure lib/dnssd/platform/DnssdBrowseDelegate.h \
+                     --known-failure lib/support/CHIPArgParser.hpp \
+                     --known-failure messaging/tests/echo/common.h \
+                     --known-failure platform/DeviceSafeQueue.cpp \
+                     --known-failure platform/DeviceSafeQueue.h \
+                     --known-failure platform/GLibTypeDeleter.h \
+                     --known-failure platform/SingletonConfigurationManager.cpp \
+                     --known-failure transport/retransmit/tests/TestCacheDriver.cpp \
+                  "
+
             - name: Check for matter lint errors
               if: always()
               run: |
diff --git a/scripts/tools/not_known_to_gn.py b/scripts/tools/not_known_to_gn.py
new file mode 100755
index 0000000..613f8a2
--- /dev/null
+++ b/scripts/tools/not_known_to_gn.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2024 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+Lists files specific files from a source tree and ensures
+they are covered by GN in some way.
+
+'Covered' is very loosely and it just tries to see if the GN text
+contains that word without trying to validate if this is a
+comment or some actual 'source' element.
+
+It is intended as a failsafe to not foget adding source files
+to gn.
+"""
+import logging
+import os
+import sys
+from pathlib import Path, PurePath
+from typing import Dict, Set
+
+import click
+import coloredlogs
+
+__LOG_LEVELS__ = {
+    'debug': logging.DEBUG,
+    'info': logging.INFO,
+    'warn': logging.WARN,
+    'fatal': logging.FATAL,
+}
+
+
+class OrphanChecker:
+    def __init__(self):
+        self.gn_data: Dict[str, str] = {}
+        self.known_failures: Set[str] = set()
+        self.fatal_failures = 0
+        self.failures = 0
+        self.found_failures: Set[str] = set()
+
+    def AppendGnData(self, gn: PurePath):
+        """Adds a GN file to the list of internally known GN data.
+
+        Will read the entire content of the GN file in memory for future reference.
+        """
+        logging.debug(f'Adding GN {gn!s} for {gn.parent!s}')
+        self.gn_data[str(gn.parent)] = gn.read_text('utf-8')
+
+    def AddKnownFailure(self, k: str):
+        self.known_failures.add(k)
+
+    def _IsKnownFailure(self, path: str) -> bool:
+        """check if failing on the given path is a known/acceptable failure"""
+        for k in self.known_failures:
+            if path == k or path.endswith(os.path.sep + k):
+                # mark some found failures to report if something is supposed
+                # to be known but it is not
+                self.found_failures.add(k)
+                return True
+        return False
+
+    def Check(self, top_dir: str, file: PurePath):
+        """
+        Validates that the given path is somehow referenced in GN files in any
+        of the parent sub-directories of the file.
+
+        `file` must be relative to `top_dir`. Top_dir is used to resolve relative
+        paths in error reports and known failure checks.
+        """
+        # Check logic:
+        #   - ensure the file name is included in some GN file inside this or
+        #     upper directory (although upper directory is not ideal)
+        for p in file.parents:
+            data = self.gn_data.get(str(p), None)
+            if not data:
+                continue
+
+            if file.name in data:
+                logging.debug("%s found in BUILD.gn for %s", file, p)
+                return
+
+        path = str(file.relative_to(top_dir))
+        if not self._IsKnownFailure(path):
+            logging.error("UNKNOWN to gn: %s", path)
+            self.fatal_failures += 1
+        else:
+            logging.warning("UNKNOWN to gn: %s (known error)", path)
+
+        self.failures += 1
+
+
+@click.command()
+@click.option(
+    '--log-level',
+    default='INFO',
+    type=click.Choice(list(__LOG_LEVELS__.keys()), case_sensitive=False),
+    help='Determines the verbosity of script output',
+)
+@click.option(
+    '--extensions',
+    default=["cpp", "cc", "c", "h", "hpp"],
+    type=str, multiple=True,
+    help='What file extensions to consider',
+)
+@click.option(
+    '--known-failure',
+    type=str, multiple=True,
+    help='What paths are known to fail',
+)
+@click.option(
+    '--skip-dir',
+    type=str,
+    multiple=True,
+    help='Skip a specific sub-directory from checks',
+)
+@click.argument('dirs',
+                type=click.Path(exists=True, file_okay=False, resolve_path=True), nargs=-1)
+def main(log_level, extensions, dirs, known_failure, skip_dir):
+    coloredlogs.install(level=__LOG_LEVELS__[log_level],
+                        fmt='%(asctime)s %(levelname)-7s %(message)s')
+
+    if not dirs:
+        logging.error("Please provide at least one directory to scan")
+        sys.exit(1)
+
+    if not extensions:
+        logging.error("Need at  least one extension")
+        sys.exit(1)
+
+    checker = OrphanChecker()
+    for k in known_failure:
+        checker.AddKnownFailure(k)
+
+    # ensure all GN data is loaded
+    for directory in dirs:
+        for name in Path(directory).rglob("BUILD.gn"):
+            checker.AppendGnData(name)
+
+    skip_dir = set(skip_dir)
+
+    # Go through all files and check for orphaned (if any)
+    extensions = set(extensions)
+    for directory in dirs:
+        for path, dirnames, filenames in os.walk(directory):
+            if any([s in path for s in skip_dir]):
+                continue
+            for f in filenames:
+                full_path = Path(os.path.join(path, f))
+                if not full_path.suffix or full_path.suffix[1:] not in extensions:
+                    continue
+                checker.Check(directory, full_path)
+
+    if checker.failures:
+        logging.warning("%d files not known to GN (%d fatal)", checker.failures, checker.fatal_failures)
+
+    if checker.known_failures != checker.found_failures:
+        not_failing = checker.known_failures - checker.found_failures
+        logging.warning("NOTE: %d failures are not found anymore:", len(not_failing))
+        for name in not_failing:
+            logging.warning("   - %s", name)
+        # Assume this is fatal - remove some of the "known-failing" should be easy.
+        # This forces scripts to always be correct and not accumulate bad input.
+        sys.exit(1)
+
+    if checker.fatal_failures > 0:
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main(auto_envvar_prefix='CHIP')