pw_tokenizer: Update committed CSVs in directory databases - Add untracked and tracked CSV support - Check whether CSVs are merged or unmerged in a commit Change-Id: Ib530f3dda5f53d438cf262195a2ba64031a8b40a Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/103500 Commit-Queue: Carlos Chinchilla <cachinchilla@google.com> Reviewed-by: Wyatt Hepler <hepler@google.com> Reviewed-by: Leonardo Acosta <leoacosta@google.com>

commit: 53df37a184e4111444eac13f185e413bcbba4891 [log] [tgz]
author: Leo Acosta <leoacosta@google.com> Mon Aug 15 21:26:41 2022 +0000
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Mon Aug 15 21:26:41 2022 +0000
tree: a4020f6e9da4e3b13827664ff8d5f52bfd31d520
parent: 49e0bfc5a70e748a0c3f1721d46e7b9838408021 [diff]
diff --git a/pw_tokenizer/database.gni b/pw_tokenizer/database.gni
index 07d441f..f5cc21b 100644
--- a/pw_tokenizer/database.gni
+++ b/pw_tokenizer/database.gni

@@ -102,6 +102,12 @@
 
     if (_create == "") {
       args = [ "add" ]
+      if (defined(invoker.commit)) {
+        args += [
+          "--discard-temporary",
+          invoker.commit,
+        ]
+      }
       inputs += [ _database ]
       stamp = true
     } else {

diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index e81e8ed..ff326c6 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst

@@ -780,14 +780,31 @@
 A directory database is a collection of CSV files with unrepeated filenames.
 The ``add`` command creates a CSV in the database when new token entries exist
 from the supplied database type. The resulting CSV contains only new tokens.
-The filename is unique in the database folder and is named after a universally
-unique identifier (UUID). This is helpful when there are asynchronous additions
-to the database as merge conflicts are avoided.
+Every filename in the database folder is named after a universally unique
+identifier (UUID). This is helpful when there are asynchronous additions to
+the database as merge conflicts are avoided.
 
 .. code-block:: sh
 
    ./database.py add --database DIR_DATABASE_NAME ELF_OR_DATABASE_FILE
 
+The ``discard-temporary`` argument allows developers to input a git commit
+hash or alias to maintain a single CSV file since such commit. After the
+initial CSV is created with new tokens, passing a git commit hash or alias
+allows for a single CSV to be maintained in the latest commit with new entries
+that appear after each build. When a git repository does not exists or commit
+is not provided, the same functionality as ``add`` is invoked.
+
+.. code-block:: sh
+
+   ./database.py add --discard-temporary COMMIT --database DIR_DATABASE_NAME
+   ELF_OR_DATABASE_FILE
+
+Assuming a CSV file exists in the provided commit within the directory database.
+The ``add`` command with ``discard-temporary`` adds new token entries and discards
+any entries from the retrieved CSV that exist in the current build and not in the
+previous build. The CSV is retrieved by using the provided commit.
+
 GN integration
 ^^^^^^^^^^^^^^
 Token databases may be updated or created as part of a GN build. The

diff --git a/pw_tokenizer/py/database_test.py b/pw_tokenizer/py/database_test.py
index 0fff8f4..496f6f4 100755
--- a/pw_tokenizer/py/database_test.py
+++ b/pw_tokenizer/py/database_test.py

@@ -16,8 +16,11 @@
 
 import json
 import io
+import os
 from pathlib import Path
 import shutil
+import stat
+import subprocess
 import sys
 import tempfile
 import unittest
@@ -158,6 +161,16 @@
     return io.TextIOWrapper(output, write_through=True)
 
 
+def _remove_readonly(func, path, excinfo) -> None:  # pylint: disable=unused-argument
+    """Changes file permission and recalls the calling function."""
+    print('Path attempted to be deleted:', path)
+    if not os.access(path, os.W_OK):
+        # Change file permissions.
+        os.chmod(path, stat.S_IWUSR)
+        # Call the calling function again.
+        func(path)
+
+
 class DatabaseCommandLineTest(unittest.TestCase):
     """Tests the database.py command line interface."""
     def setUp(self) -> None:
@@ -318,9 +331,10 @@
         self._csv_test_domain = CSV_TEST_DOMAIN
 
     def tearDown(self) -> None:
-        shutil.rmtree(self._dir)
+        shutil.rmtree(self._dir, onerror=_remove_readonly)
 
     def test_add_csv_to_dir(self) -> None:
+        """Tests a CSV can be created within the database."""
         run_cli('add', '--database', self._db_dir, f'{self._elf}#TEST_DOMAIN')
         directory = list(self._db_dir.iterdir())
 
@@ -332,6 +346,7 @@
                          self._db_csv.read_text().splitlines())
 
     def test_add_all_domains_to_dir(self) -> None:
+        """Tests a CSV with all domains can be added to the database."""
         run_cli('add', '--database', self._db_dir, f'{self._elf}#.*')
         directory = list(self._db_dir.iterdir())
 
@@ -343,6 +358,7 @@
                          self._db_csv.read_text().splitlines())
 
     def test_not_adding_existing_tokens(self) -> None:
+        """Tests duplicate tokens are not added to the database."""
         run_cli('add', '--database', self._db_dir, f'{self._elf}#TEST_DOMAIN')
         run_cli('add', '--database', self._db_dir, f'{self._elf}#TEST_DOMAIN')
         directory = list(self._db_dir.iterdir())
@@ -354,6 +370,155 @@
         self.assertEqual(self._csv_test_domain.splitlines(),
                          self._db_csv.read_text().splitlines())
 
+    def test_adding_tokens_without_git_repo(self):
+        """Tests creating new files with new entries when no repo exists."""
+        # Add CSV_TEST_DOMAIN to a new CSV in the directory database.
+        run_cli('add', '--database', self._db_dir, f'{self._elf}#TEST_DOMAIN')
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+
+        first_csv_in_db = directory.pop()
+
+        self.assertEqual(self._csv_test_domain.splitlines(),
+                         first_csv_in_db.read_text().splitlines())
+        # Add CSV_ALL_DOMAINS to a new CSV in the directory database.
+        run_cli('add', '--database', self._db_dir, f'{self._elf}#.*')
+        directory = list(self._db_dir.iterdir())
+        # Assert two different CSVs were created to store new tokens.
+        self.assertEqual(2, len(directory))
+        # Retrieve the other CSV in the directory.
+        second_csv_in_db = directory[
+            0] if directory[0] != first_csv_in_db else directory[1]
+
+        self.assertNotEqual(first_csv_in_db, second_csv_in_db)
+        self.assertEqual(self._csv_test_domain.splitlines(),
+                         first_csv_in_db.read_text().splitlines())
+
+        # Retrieve entries that exclusively exist in CSV_ALL_DOMAINS
+        # as CSV_ALL_DOMAINS contains all entries in TEST_DOMAIN.
+        entries_exclusively_in_all_domain = (
+            set(CSV_ALL_DOMAINS.splitlines()) -
+            set(self._csv_test_domain.splitlines()))
+        # Ensure only new tokens not in CSV_TEST_DOMAIN were added to
+        # the second CSV added to the directory database.
+        self.assertEqual(entries_exclusively_in_all_domain,
+                         set(second_csv_in_db.read_text().splitlines()))
+
+    def test_untracked_files_in_dir(self):
+        """Tests untracked CSVs are reused by the database."""
+        subprocess.run(['git', 'init'], cwd=self._dir)
+        # Add CSV_TEST_DOMAIN to a new CSV in the directory database.
+        run_cli('add', '--database', self._db_dir, '--discard-temporary',
+                'HEAD', f'{self._elf}#TEST_DOMAIN')
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+
+        first_path_in_db = directory.pop()
+
+        self.assertEqual(self._csv_test_domain.splitlines(),
+                         first_path_in_db.read_text().splitlines())
+        # Retrieve the untracked CSV in the Git repository and discard
+        # tokens that do not exist in CSV_DEFAULT_DOMAIN.
+        run_cli('add', '--database', self._db_dir, '--discard-temporary',
+                'HEAD', self._elf)
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+
+        reused_path_in_db = directory.pop()
+        # Ensure the first path created is the same being reused. Also,
+        # the CSV content is the same as CSV_DEFAULT_DOMAIN.
+        self.assertEqual(first_path_in_db, reused_path_in_db)
+        self.assertEqual(CSV_DEFAULT_DOMAIN.splitlines(),
+                         reused_path_in_db.read_text().splitlines())
+
+    def test_adding_multiple_elf_files(self) -> None:
+        """Tests adding multiple elf files to a file in the database."""
+        # Add CSV_TEST_DOMAIN to a new CSV in the directory database.
+        run_cli('add', '--database', self._db_dir, f'{self._elf}#TEST_DOMAIN',
+                self._elf)
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+        # Combines CSV_DEFAULT_DOMAIN and TEST_DOMAIN into a unique set
+        # of token entries.
+        entries_from_default_and_test_domain = set(
+            CSV_DEFAULT_DOMAIN.splitlines()).union(
+                set(self._csv_test_domain.splitlines()))
+        # Multiple ELF files were added at once to a single CSV.
+        self.assertEqual(entries_from_default_and_test_domain,
+                         set(directory.pop().read_text().splitlines()))
+
+    def test_discarding_old_entries(self) -> None:
+        """Tests discarding old entries for new entries when re-adding."""
+        subprocess.run(['git', 'init'], cwd=self._dir)
+        # Add CSV_ALL_DOMAINS to a new CSV in the directory database.
+        run_cli('add', '--database', self._db_dir, '--discard-temporary',
+                'HEAD', f'{self._elf}#.*')
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+
+        untracked_path_in_db = directory.pop()
+
+        self.assertEqual(CSV_ALL_DOMAINS.splitlines(),
+                         untracked_path_in_db.read_text().splitlines())
+        # Add CSV_DEFAULT_DOMAIN and CSV_TEST_DOMAIN to a CSV in the
+        # directory database, while replacing entries in CSV_ALL_DOMAINS
+        # that no longer exist.
+        run_cli('add', '--database', self._db_dir, '--discard-temporary',
+                'HEAD', f'{self._elf}#TEST_DOMAIN', self._elf)
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+
+        reused_path_in_db = directory.pop()
+        # Combines CSV_DEFAULT_DOMAIN and TEST_DOMAIN.
+        entries_from_default_and_test_domain = set(
+            CSV_DEFAULT_DOMAIN.splitlines()).union(
+                set(self._csv_test_domain.splitlines()))
+
+        self.assertEqual(untracked_path_in_db, reused_path_in_db)
+        self.assertEqual(entries_from_default_and_test_domain,
+                         set(reused_path_in_db.read_text().splitlines()))
+
+    def test_retrieving_csv_from_commit(self) -> None:
+        """Tests retrieving a CSV from a commit and removing temp tokens."""
+        subprocess.run(['git', 'init'], cwd=self._dir)
+        subprocess.run(
+            ['git', 'commit', '--allow-empty', '-m', 'First Commit'],
+            cwd=self._dir)
+        # Add CSV_ALL_DOMAINS to a new CSV in the directory database.
+        run_cli('add', '--database', self._db_dir, f'{self._elf}#.*')
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+
+        tracked_path_in_db = directory.pop()
+
+        self.assertEqual(CSV_ALL_DOMAINS.splitlines(),
+                         tracked_path_in_db.read_text().splitlines())
+        # Commit the CSV to avoid retrieving the CSV with the checks
+        # for untracked changes.
+        subprocess.run(['git', 'add', '--all'], cwd=self._dir)
+        subprocess.run(
+            ['git', 'commit', '-m', 'Adding a CSV to a new commit.'],
+            cwd=self._dir)
+        # Retrieve the CSV in HEAD and discard tokens that exist in
+        # CSV_ALL_DOMAINS and do not exist in CSV_TEST_DOMAIN.
+        run_cli('add', '--database', self._db_dir, '--discard-temporary',
+                'HEAD~', f'{self._elf}#TEST_DOMAIN')
+        directory = list(self._db_dir.iterdir())
+
+        self.assertEqual(1, len(directory))
+
+        reused_path_in_db = directory.pop()
+
+        self.assertEqual(self._csv_test_domain.splitlines(),
+                         reused_path_in_db.read_text().splitlines())
+
 
 if __name__ == '__main__':
     unittest.main()

diff --git a/pw_tokenizer/py/pw_tokenizer/database.py b/pw_tokenizer/py/pw_tokenizer/database.py
index 1d67042..0852732 100755
--- a/pw_tokenizer/py/pw_tokenizer/database.py
+++ b/pw_tokenizer/py/pw_tokenizer/database.py

@@ -21,6 +21,7 @@
 import argparse
 from datetime import datetime
 import glob
+import itertools
 import json
 import logging
 import os
@@ -323,18 +324,24 @@
 
 
 def _handle_add(token_database: tokens.DatabaseFile,
-                databases: List[tokens.Database]) -> None:
+                databases: List[tokens.Database],
+                commit: Optional[str]) -> None:
     initial = len(token_database)
+    if commit:
+        entries = itertools.chain.from_iterable(db.entries()
+                                                for db in databases)
+        token_database.add_and_discard_temporary(entries, commit)
+    else:
+        for source in databases:
+            token_database.add(source.entries())
 
-    for source in databases:
-        token_database.add(source.entries())
+        token_database.write_to_file()
 
-    number_of_entries_added = len(token_database) - initial
+    number_of_changes = len(token_database) - initial
 
-    token_database.write_to_file()
-
-    _LOG.info('Added %d entries to %s', number_of_entries_added,
-              token_database.path)
+    if number_of_changes:
+        _LOG.info('Added %d entries to %s', number_of_changes,
+                  token_database.path)
 
 
 def _handle_mark_removed(token_database: tokens.DatabaseFile,
@@ -564,6 +571,14 @@
             'of ELF files or other token databases. Missing entries are NOT '
             'marked as removed.'))
     subparser.set_defaults(handler=_handle_add)
+    subparser.add_argument(
+        '--discard-temporary',
+        dest='commit',
+        help=
+        ('Deletes temporary tokens in memory and on disk when a CSV exists '
+         'within a commit. Afterwards, new strings are added to the database '
+         'from a set of ELF files or other token databases. Missing entries '
+         'are NOT marked as removed.'))
 
     # The 'mark_removed' command marks removed entries to match a set of ELFs.
     subparser = subparsers.add_parser(

diff --git a/pw_tokenizer/py/pw_tokenizer/tokens.py b/pw_tokenizer/py/pw_tokenizer/tokens.py
index 9272ac6..ea75607 100644
--- a/pw_tokenizer/py/pw_tokenizer/tokens.py
+++ b/pw_tokenizer/py/pw_tokenizer/tokens.py

@@ -23,6 +23,7 @@
 from pathlib import Path
 import re
 import struct
+import subprocess
 from typing import (BinaryIO, Callable, Dict, Iterable, Iterator, List,
                     NamedTuple, Optional, Pattern, TextIO, Tuple, Union,
                     ValuesView)
@@ -487,6 +488,16 @@
     def write_to_file(self) -> None:
         """Exports in the original format to the original path."""
 
+    @abstractmethod
+    def add_and_discard_temporary(self,
+                                  entries: Iterable[TokenizedStringEntry],
+                                  commit: str) -> None:
+        """Discards and adds entries to export in the original format.
+
+        Adds entries after removing temporary entries from the Database
+        to exclusively write re-occurring entries into memory and disk.
+        """
+
 
 class _BinaryDatabase(DatabaseFile):
     def __init__(self, path: Path, fd: BinaryIO) -> None:
@@ -497,6 +508,13 @@
         with self.path.open('wb') as fd:
             write_binary(self, fd)
 
+    def add_and_discard_temporary(self,
+                                  entries: Iterable[TokenizedStringEntry],
+                                  commit: str) -> None:
+        # TODO(b/241471465): Implement adding new tokens and removing
+        # temporary entries for binary databases.
+        raise NotImplementedError
+
 
 class _CSVDatabase(DatabaseFile):
     def __init__(self, path: Path, fd: TextIO) -> None:
@@ -507,6 +525,13 @@
         with self.path.open('wb') as fd:
             write_csv(self, fd)
 
+    def add_and_discard_temporary(self,
+                                  entries: Iterable[TokenizedStringEntry],
+                                  commit: str) -> None:
+        # TODO(b/241471465): Implement adding new tokens and removing
+        # temporary entries for CSV databases.
+        raise NotImplementedError
+
 
 def _parse_directory(paths: Iterable[Path]) -> Iterable[TokenizedStringEntry]:
     """Parses TokenizedStringEntries from files in the directory as a CSV."""
@@ -515,31 +540,137 @@
             yield from parse_csv(fd)
 
 
+def _git_stdout(commands: List, cwd: Path) -> str:
+    """Runs the git commands in the provided cwd and captures the output."""
+    return subprocess.run(['git'] + commands,
+                          capture_output=True,
+                          check=True,
+                          cwd=cwd,
+                          text=True).stdout.strip()
+
+
+def _new_entries(
+    entries: Iterable[TokenizedStringEntry],
+    database: Dict[_EntryKey,
+                   TokenizedStringEntry]) -> List[TokenizedStringEntry]:
+    """Retrieves the entries not in the database."""
+    return [entry for entry in entries if entry.key() not in database]
+
+
 class _DirectoryDatabase(DatabaseFile):
     def __init__(self, directory: Path) -> None:
-        # Create a DatabaseFile using the directory.
         super().__init__(directory, _parse_directory(directory.iterdir()))
-        self._original_entries = self._database.copy()
-        # TODO(b/239551346): Check whether a CSV exists in
-        # HEAD. Exclude the CSV when loading the database.
-        # Generate a unique filename not in the directory.
-        self._csv_file = directory / f'{uuid4().hex}.csv'
-        while self._csv_file.exists():
-            self._csv_file = directory / f'{uuid4().hex}.csv'
 
     def write_to_file(self) -> None:
         """Exports the database in CSV format to the original path."""
-        new_entries = self._get_new_token_entries()
+        database = Database(_parse_directory(self.path.iterdir()))._database  # pylint: disable=protected-access
+        new_entries = _new_entries(self.entries(), database)
         if new_entries:
-            with self._csv_file.open('wb') as fd:
-                for entry in new_entries:
-                    _write_csv_line(fd, entry)
+            with self._create_filename().open('wb') as fd:
+                write_csv(Database(new_entries), fd)
 
-    def _get_new_token_entries(self) -> List[TokenizedStringEntry]:
-        """Collects new entries and returns the total new entries found."""
-        new_token_entries = []
-        for entry in sorted(self.entries()):
-            original_entry = self._original_entries.get(entry.key())
-            if original_entry != entry:
-                new_token_entries.append(entry)
-        return new_token_entries
+    def _path_to_repo(self) -> Path:
+        """Creates a path to the repository the database is within."""
+        return Path(_git_stdout(['rev-parse', '--show-toplevel'],
+                                self.path)).resolve()
+
+    def _git_paths(self, commands: List, path_to_repo: Path) -> List[Path]:
+        """Creates a list of complete paths to the repo."""
+        try:
+            changes = _git_stdout(commands + [self.path],
+                                  path_to_repo).splitlines()
+            return [path_to_repo / path for path in changes]
+        except subprocess.CalledProcessError:
+            return []
+
+    def _find_latest_csv(self, commit: str) -> Path:
+        """Finds or creates a CSV to utilize for new entries.
+
+        - Searches for untracked files in the repo to re-use.
+        - When no untracked files exists, a difference between commit
+          and HEAD searches for a filename in the commit to re-use.
+        - Multiple differences between HEAD~ and HEAD causes a search
+          for a CSV in the latest commit.
+        - When nothing is yielded from the previous searches or no git repo
+          exists, then a new filename is created.
+        """
+        # Finds the repository the database is within.
+        try:
+            path_to_repo = self._path_to_repo()
+        except subprocess.CalledProcessError:
+            return self._create_filename()
+
+        # Find untracked_changes in directory.
+        untracked_changes = self._git_paths(
+            ['ls-files', '--others', '--exclude-standard'], path_to_repo)
+
+        if untracked_changes:
+            # TODO(b/241297219): Handle multiple paths in the commit
+            # There are uncommitted files in the repo.
+            assert len(untracked_changes) < 2
+            return untracked_changes.pop()
+
+        # TODO(b/241297219): Create an argument to specify what to run
+        # the git difference on.
+        # Find differences between origin/HEAD and HEAD commit in directory.
+        tracked_changes = self._git_paths(
+            ['diff', '--name-only', '--diff-filter=A', commit], path_to_repo)
+
+        if tracked_changes:
+            # If a single change exists in the comparison, the filename will
+            # utilized. Otherwise, when multiple files exists in a commit and
+            # a filename exist in the latest commit, use the latest commit to
+            # retrieve a CSV.
+            if len(tracked_changes) == 1:
+                return tracked_changes.pop()
+
+            tracked_changes_from_latest_commit = self._git_paths(
+                ['diff', '--name-only', '--diff-filter=A', 'HEAD~'],
+                path_to_repo)
+            # TODO(b/241297219): Handle multiple paths in the commit.
+            assert len(tracked_changes_from_latest_commit) < 2
+            return tracked_changes_from_latest_commit.pop()
+
+        return self._create_filename()
+
+    def _create_filename(self) -> Path:
+        """Generates a unique filename not in the directory."""
+        # Tracked and untracked files do not exist in the repo.
+        path = self.path / f'{uuid4().hex}.csv'
+        while path.exists():
+            path = self.path / f'{uuid4().hex}.csv'
+        return path
+
+    def add_and_discard_temporary(self,
+                                  entries: Iterable[TokenizedStringEntry],
+                                  commit: str) -> None:
+        """Adds and updates entries to export in the CSV format.
+
+        - New entries are collected from the incoming entries.
+        - When no CSV exists, the new entries are loaded into a database and
+          written to disk.
+        - When a CSV exists and incoming entries do not exist within the CSV
+          database, the temporary entries are removed from the directory and
+          CSV database.
+        - New entries are added to the CSV database and re-written to disk.
+        """
+        db = Database(entries)._database  # pylint: disable=protected-access
+        new_entries = _new_entries(db.values(), self._database)
+        csv_path = self._find_latest_csv(commit)
+
+        if csv_path.exists():
+            # Loading the CSV as a DatabaseFile.
+            csv_db = DatabaseFile.create(csv_path)
+            # Collects the keys to delete from the CSV database.
+            keys_to_delete = (e.key()
+                              for e in _new_entries(csv_db.entries(), db))
+            for key in keys_to_delete:
+                del self._database[key]
+                del csv_db._database[key]  # pylint: disable=protected-access
+            csv_db.add(new_entries)
+            csv_db.write_to_file()
+        elif new_entries:
+            with csv_path.open('wb') as fd:
+                write_csv(Database(new_entries), fd)
+
+        self.add(new_entries)

diff --git a/pw_tokenizer/py/tokens_test.py b/pw_tokenizer/py/tokens_test.py
index 292043d..a22ae04 100755
--- a/pw_tokenizer/py/tokens_test.py
+++ b/pw_tokenizer/py/tokens_test.py

@@ -594,8 +594,8 @@
         self._db_csv.write_text(CSV_DATABASE)
         csv = tokens.DatabaseFile.create(self._db_csv)
         directory_db = database.load_token_database(self._db_dir)
-        self.assertEqual(str(csv), str(directory_db))
         self.assertEqual(1, len(list(self._db_dir.iterdir())))
+        self.assertEqual(str(csv), str(directory_db))
 
     def test_loading_multiples_files(self) -> None:
         self._db_csv.write_text(CSV_DATABASE_3)
@@ -612,8 +612,8 @@
         all_databases_merged = tokens.Database.merged(first_csv, second_csv,
                                                       third_csv)
         directory_db = database.load_token_database(self._db_dir)
-        self.assertEqual(str(all_databases_merged), str(directory_db))
         self.assertEqual(3, len(list(self._db_dir.iterdir())))
+        self.assertEqual(str(all_databases_merged), str(directory_db))
 
     def test_loading_multiples_files_with_removal_dates(self) -> None:
         self._db_csv.write_text(CSV_DATABASE)
@@ -630,9 +630,8 @@
         all_databases_merged = tokens.Database.merged(first_csv, second_csv,
                                                       third_csv)
         directory_db = database.load_token_database(self._db_dir)
-
-        self.assertEqual(str(all_databases_merged), str(directory_db))
         self.assertEqual(3, len(list(self._db_dir.iterdir())))
+        self.assertEqual(str(all_databases_merged), str(directory_db))
 
 
 if __name__ == '__main__':
commit	53df37a184e4111444eac13f185e413bcbba4891	[log] [tgz]
author	Leo Acosta <leoacosta@google.com>	Mon Aug 15 21:26:41 2022 +0000
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Mon Aug 15 21:26:41 2022 +0000
tree	a4020f6e9da4e3b13827664ff8d5f52bfd31d520
parent	49e0bfc5a70e748a0c3f1721d46e7b9838408021 [diff]