pw_presubmit: Add submodule discovery code

Bug: b/244210619
Change-Id: I2cbbd2b29cb3edafbadbbd385b41845ee169176a
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/108850
Commit-Queue: Tim Laurence <timlaurence@google.com>
Reviewed-by: Rob Mohr <mohrr@google.com>
Reviewed-by: Wyatt Hepler <hepler@google.com>
diff --git a/pw_presubmit/py/BUILD.gn b/pw_presubmit/py/BUILD.gn
index 8ad3667..221fc15 100644
--- a/pw_presubmit/py/BUILD.gn
+++ b/pw_presubmit/py/BUILD.gn
@@ -40,6 +40,7 @@
     "pw_presubmit/tools.py",
   ]
   tests = [
+    "git_repo_test.py",
     "presubmit_test.py",
     "tools_test.py",
   ]
diff --git a/pw_presubmit/py/git_repo_test.py b/pw_presubmit/py/git_repo_test.py
new file mode 100755
index 0000000..bdabe25
--- /dev/null
+++ b/pw_presubmit/py/git_repo_test.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# Copyright 2020 The Pigweed Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+"""git repo module tests"""
+
+from unittest import mock
+import re
+import pathlib
+import unittest
+
+from pw_presubmit import git_repo
+
+
+class TestGitRepo(unittest.TestCase):
+    """Tests for git_repo.py"""
+
+    GIT_ROOT = pathlib.Path("/dev/null/test")
+    SUBMODULES = [
+        pathlib.Path("third_party/pigweed"),
+        pathlib.Path("vendor/anycom/p1"),
+        pathlib.Path("vendor/anycom/p2")
+    ]
+    GIT_SUBMODULES_OUT = "\n".join([str(x) for x in SUBMODULES])
+
+    def setUp(self) -> None:
+        self.git_stdout = mock.patch.object(git_repo,
+                                            "git_stdout",
+                                            autospec=True).start()
+        self.git_stdout.return_value = self.GIT_SUBMODULES_OUT
+        self.root = mock.patch.object(git_repo, "root", autospec=True).start()
+        self.root.return_value = self.GIT_ROOT
+        super().setUp()
+
+    def tearDown(self) -> None:
+        mock.patch.stopall()
+        super().tearDown()
+
+    def test_mock_root(self):
+        """Ensure our mock works since so many of our tests depend upon it."""
+        self.assertEqual(git_repo.root(), self.GIT_ROOT)
+
+    def test_discover_submodules_1(self):
+        paths = git_repo.discover_submodules(superproject_dir=self.GIT_ROOT)
+        self.assertIn(self.GIT_ROOT, paths)
+
+    def test_discover_submodules_2(self):
+        paths = git_repo.discover_submodules(superproject_dir=self.GIT_ROOT)
+        self.assertIn(self.SUBMODULES[2], paths)
+
+    def test_discover_submodules_with_exclude_str(self):
+        paths = git_repo.discover_submodules(
+            superproject_dir=self.GIT_ROOT,
+            excluded_paths=(self.GIT_ROOT.as_posix(), ),
+        )
+        self.assertNotIn(self.GIT_ROOT, paths)
+
+    def test_discover_submodules_with_exclude_regex(self):
+        paths = git_repo.discover_submodules(
+            superproject_dir=self.GIT_ROOT,
+            excluded_paths=(re.compile("third_party/.*"), ))
+        self.assertNotIn(self.SUBMODULES[0], paths)
+
+    def test_discover_submodules_with_exclude_str_miss(self):
+        paths = git_repo.discover_submodules(
+            superproject_dir=self.GIT_ROOT,
+            excluded_paths=(re.compile("pigweed"), ))
+        self.assertIn(self.SUBMODULES[-1], paths)
+
+    def test_discover_submodules_with_exclude_regex_miss_1(self):
+        paths = git_repo.discover_submodules(
+            superproject_dir=self.GIT_ROOT,
+            excluded_paths=(re.compile("foo/.*"), ))
+        self.assertIn(self.GIT_ROOT, paths)
+        for module in self.SUBMODULES:
+            self.assertIn(module, paths)
+
+    def test_discover_submodules_with_exclude_regex_miss_2(self):
+        paths = git_repo.discover_submodules(
+            superproject_dir=self.GIT_ROOT,
+            excluded_paths=(re.compile("pigweed"), ))
+        self.assertIn(self.GIT_ROOT, paths)
+        for module in self.SUBMODULES:
+            self.assertIn(module, paths)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pw_presubmit/py/pw_presubmit/cli.py b/pw_presubmit/py/pw_presubmit/cli.py
index bda9a7b..34f4ec4 100644
--- a/pw_presubmit/py/pw_presubmit/cli.py
+++ b/pw_presubmit/py/pw_presubmit/cli.py
@@ -203,7 +203,7 @@
       **other_args: remaining arguments defined by by add_arguments
 
     Returns:
-      exit code for sys.exit; 0 if succesful, 1 if an error occurred
+      exit code for sys.exit; 0 if successful, 1 if an error occurred
     """
     if root is None:
         root = git_repo.root()
diff --git a/pw_presubmit/py/pw_presubmit/format_code.py b/pw_presubmit/py/pw_presubmit/format_code.py
index 115ce1e..55db336 100755
--- a/pw_presubmit/py/pw_presubmit/format_code.py
+++ b/pw_presubmit/py/pw_presubmit/format_code.py
@@ -257,7 +257,6 @@
     if not errors:
         # Don't print anything in the all-good case.
         return
-
     # Show the format fixing diff suggested by the tooling (with colors).
     _LOG.warning('Found %d files with formatting errors. Format changes:',
                  len(errors))
@@ -267,7 +266,7 @@
     # Show a copy-and-pastable command to fix the issues.
     if show_fix_commands:
 
-        def path_relative_to_cwd(path):
+        def path_relative_to_cwd(path: Path):
             try:
                 return Path(path).resolve().relative_to(Path.cwd().resolve())
             except ValueError:
diff --git a/pw_presubmit/py/pw_presubmit/git_repo.py b/pw_presubmit/py/pw_presubmit/git_repo.py
index 2ce15a5..59b1c70 100644
--- a/pw_presubmit/py/pw_presubmit/git_repo.py
+++ b/pw_presubmit/py/pw_presubmit/git_repo.py
@@ -23,6 +23,7 @@
 
 _LOG = logging.getLogger(__name__)
 PathOrStr = Union[Path, str]
+PatternOrStr = Union[Pattern, str]
 
 TRACKING_BRANCH_ALIAS = '@{upstream}'
 _TRACKING_BRANCH_ALIASES = TRACKING_BRANCH_ALIAS, '@{u}'
@@ -31,7 +32,7 @@
 def git_stdout(*args: PathOrStr,
                show_stderr=False,
                repo: PathOrStr = '.') -> str:
-    return log_run(['git', '-C', repo, *args],
+    return log_run(['git', '-C', str(repo), *args],
                    stdout=subprocess.PIPE,
                    stderr=None if show_stderr else subprocess.DEVNULL,
                    check=True).stdout.decode().strip()
@@ -332,3 +333,42 @@
         args += ['--short']
     args += [rev]
     return git_stdout(*args, repo=repo)
+
+
+def discover_submodules(
+    superproject_dir: Path, excluded_paths: Collection[PatternOrStr] = ()
+) -> List[Path]:
+    """Query git and return a list of submodules in the current project.
+
+    Args:
+        superproject_dir: Path object to directory under which we are looking
+                          for submodules. This will also be included in list
+                          returned unless excluded.
+        excluded_paths: Pattern or string that match submodules that should not
+                        be returned. All matches are done on posix style paths.
+
+    Returns:
+        List of "Path"s which were found but not excluded, this includes
+        superproject_dir unless excluded.
+    """
+    discovery_report = git_stdout('submodule',
+                                  'foreach',
+                                  '--quiet',
+                                  '--recursive',
+                                  'echo $sm_path',
+                                  repo=superproject_dir)
+    module_dirs = [Path(line) for line in discovery_report.split()]
+    # The superproject is omitted in the prior scan.
+    module_dirs.append(superproject_dir)
+
+    for exclude in excluded_paths:
+        if isinstance(exclude, Pattern):
+            for module_dir in reversed(module_dirs):
+                if exclude.fullmatch(module_dir.as_posix()):
+                    module_dirs.remove(module_dir)
+        else:
+            for module_dir in reversed(module_dirs):
+                if exclude == module_dir.as_posix():
+                    module_dirs.remove(module_dir)
+
+    return module_dirs