blob: 74cf9951d5e83c173fc11ec8d52cef6d61b00f41 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2020 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Tests for the database module."""
import json
import io
from pathlib import Path
import shutil
import sys
import tempfile
import unittest
from unittest import mock
from pw_tokenizer import database
# This is an ELF file with only the pw_tokenizer sections. It was created
# from a tokenize_test binary built for the STM32F429i Discovery board. The
# pw_tokenizer sections were extracted with this command:
#
# arm-none-eabi-objcopy -S --only-section ".pw_tokenize*" <ELF> <OUTPUT>
#
TOKENIZED_ENTRIES_ELF = Path(
__file__).parent / 'example_binary_with_tokenized_strings.elf'
LEGACY_PLAIN_STRING_ELF = Path(
__file__).parent / 'example_legacy_binary_with_tokenized_strings.elf'
CSV_DEFAULT_DOMAIN = '''\
00000000, ,""
141c35d5, ,"The answer: ""%s"""
29aef586, ,"1234"
2b78825f, ,"[:-)"
2e668cd6, ,"Jello, world!"
31631781, ,"%d"
61fd1e26, ,"%ld"
68ab92da, ,"%s there are %x (%.2f) of them%c"
7b940e2a, ,"Hello %s! %hd %e"
7da55d52, ,">:-[]"
7f35a9a5, ,"TestName"
851beeb6, ,"%u %d"
881436a0, ,"The answer is: %s"
88808930, ,"%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
92723f44, ,"???"
a09d6698, ,"won-won-won-wonderful"
aa9ffa66, ,"void pw::tokenizer::{anonymous}::TestName()"
ad002c97, ,"%llx"
b3653e13, ,"Jello!"
cc6d3131, ,"Jello?"
e13b0f94, ,"%llu"
e65aefef, ,"Won't fit : %s%d"
'''
CSV_TEST_DOMAIN = """\
17fa86d3, ,"hello"
18c5017c, ,"yes"
59b2701c, ,"The answer was: %s"
881436a0, ,"The answer is: %s"
d18ada0f, ,"something"
"""
CSV_ALL_DOMAINS = '''\
00000000, ,""
141c35d5, ,"The answer: ""%s"""
17fa86d3, ,"hello"
18c5017c, ,"yes"
29aef586, ,"1234"
2b78825f, ,"[:-)"
2e668cd6, ,"Jello, world!"
31631781, ,"%d"
59b2701c, ,"The answer was: %s"
61fd1e26, ,"%ld"
68ab92da, ,"%s there are %x (%.2f) of them%c"
7b940e2a, ,"Hello %s! %hd %e"
7da55d52, ,">:-[]"
7f35a9a5, ,"TestName"
851beeb6, ,"%u %d"
881436a0, ,"The answer is: %s"
88808930, ,"%u%d%02x%X%hu%hhd%d%ld%lu%lld%llu%c%c%c"
92723f44, ,"???"
a09d6698, ,"won-won-won-wonderful"
aa9ffa66, ,"void pw::tokenizer::{anonymous}::TestName()"
ad002c97, ,"%llx"
b3653e13, ,"Jello!"
cc6d3131, ,"Jello?"
d18ada0f, ,"something"
e13b0f94, ,"%llu"
e65aefef, ,"Won't fit : %s%d"
'''
JSON_SOURCE_STRINGS = '''\
[
"pigweed/pw_polyfill/standard_library_public/pw_polyfill/standard_library/assert.h",
"protocol_buffer/gen/pigweed/pw_protobuf/common_protos.proto_library/nanopb/pw_protobuf_protos/status.pb.h",
"pigweed/pw_rpc/client_server.cc",
"pigweed/pw_rpc/public/pw_rpc/client_server.h",
"This is a very long string that will produce two tokens; one for C++ and one for C. This is because this string exceeds the default C hash length."
]
'''
CSV_STRINGS = '''\
2cbf627a, ,"pigweed/pw_rpc/client_server.cc"
666562a1, ,"protocol_buffer/gen/pigweed/pw_protobuf/common_protos.proto_library/nanopb/pw_protobuf_protos/status.pb.h"
6c1e6eb3, ,"pigweed/pw_rpc/public/pw_rpc/client_server.h"
b25a9932, ,"This is a very long string that will produce two tokens; one for C++ and one for C. This is because this string exceeds the default C hash length."
eadf017f, ,"pigweed/pw_polyfill/standard_library_public/pw_polyfill/standard_library/assert.h"
f815dc5c, ,"This is a very long string that will produce two tokens; one for C++ and one for C. This is because this string exceeds the default C hash length."
'''
EXPECTED_REPORT = {
str(TOKENIZED_ENTRIES_ELF): {
'': {
'present_entries': 22,
'present_size_bytes': 289,
'total_entries': 22,
'total_size_bytes': 289,
'collisions': {}
},
'TEST_DOMAIN': {
'present_entries': 5,
'present_size_bytes': 57,
'total_entries': 5,
'total_size_bytes': 57,
'collisions': {}
}
}
}
def run_cli(*args) -> None:
original_argv = sys.argv
sys.argv = ['database.py', *(str(a) for a in args)]
# pylint: disable=protected-access
try:
database._main(*database._parse_args())
finally:
# Remove the log handler added by _main to avoid duplicate logs.
if database._LOG.handlers:
database._LOG.handlers.pop()
# pylint: enable=protected-access
sys.argv = original_argv
def _mock_output() -> io.TextIOWrapper:
output = io.BytesIO()
output.name = '<fake stdout>'
return io.TextIOWrapper(output, write_through=True)
class DatabaseCommandLineTest(unittest.TestCase):
"""Tests the database.py command line interface."""
def setUp(self):
self._dir = Path(tempfile.mkdtemp('_pw_tokenizer_test'))
self._csv = self._dir / 'db.csv'
self._elf = TOKENIZED_ENTRIES_ELF
self._csv_test_domain = CSV_TEST_DOMAIN
def tearDown(self):
shutil.rmtree(self._dir)
def test_create_csv(self):
run_cli('create', '--database', self._csv, self._elf)
self.assertEqual(CSV_DEFAULT_DOMAIN.splitlines(),
self._csv.read_text().splitlines())
def test_create_csv_test_domain(self):
run_cli('create', '--database', self._csv, f'{self._elf}#TEST_DOMAIN')
self.assertEqual(self._csv_test_domain.splitlines(),
self._csv.read_text().splitlines())
def test_create_csv_all_domains(self):
run_cli('create', '--database', self._csv, f'{self._elf}#.*')
self.assertEqual(CSV_ALL_DOMAINS.splitlines(),
self._csv.read_text().splitlines())
def test_create_force(self):
self._csv.write_text(CSV_ALL_DOMAINS)
with self.assertRaises(FileExistsError):
run_cli('create', '--database', self._csv, self._elf)
run_cli('create', '--force', '--database', self._csv, self._elf)
def test_create_binary(self):
binary = self._dir / 'db.bin'
run_cli('create', '--type', 'binary', '--database', binary, self._elf)
# Write the binary database as CSV to verify its contents.
run_cli('create', '--database', self._csv, binary)
self.assertEqual(CSV_DEFAULT_DOMAIN.splitlines(),
self._csv.read_text().splitlines())
def test_add_does_not_recalculate_tokens(self):
db_with_custom_token = '01234567, ,"hello"'
to_add = self._dir / 'add_this.csv'
to_add.write_text(db_with_custom_token + '\n')
self._csv.touch()
run_cli('add', '--database', self._csv, to_add)
self.assertEqual(db_with_custom_token.splitlines(),
self._csv.read_text().splitlines())
def test_mark_removed(self):
self._csv.write_text(CSV_ALL_DOMAINS)
run_cli('mark_removed', '--database', self._csv, '--date',
'1998-09-04', self._elf)
# Add the removal date to the four tokens not in the default domain
new_csv = CSV_ALL_DOMAINS
new_csv = new_csv.replace('17fa86d3, ,"hello"',
'17fa86d3,1998-09-04,"hello"')
new_csv = new_csv.replace('18c5017c, ,"yes"',
'18c5017c,1998-09-04,"yes"')
new_csv = new_csv.replace('59b2701c, ,"The answer was: %s"',
'59b2701c,1998-09-04,"The answer was: %s"')
new_csv = new_csv.replace('d18ada0f, ,"something"',
'd18ada0f,1998-09-04,"something"')
self.assertNotEqual(CSV_ALL_DOMAINS, new_csv)
self.assertEqual(new_csv.splitlines(),
self._csv.read_text().splitlines())
def test_purge(self):
self._csv.write_text(CSV_ALL_DOMAINS)
# Mark everything not in TEST_DOMAIN as removed.
run_cli('mark_removed', '--database', self._csv,
f'{self._elf}#TEST_DOMAIN')
# Delete all entries except those in TEST_DOMAIN.
run_cli('purge', '--database', self._csv)
self.assertEqual(self._csv_test_domain.splitlines(),
self._csv.read_text().splitlines())
@mock.patch('sys.stdout', new_callable=_mock_output)
def test_report(self, mock_stdout):
run_cli('report', self._elf)
self.assertEqual(json.loads(mock_stdout.buffer.getvalue()),
EXPECTED_REPORT)
def test_replace(self):
sub = 'replace/ment'
run_cli('create', '--database', self._csv, self._elf, '--replace',
r'(?i)\b[jh]ello\b/' + sub)
self.assertEqual(
CSV_DEFAULT_DOMAIN.replace('Jello', sub).replace('Hello', sub),
self._csv.read_text())
def test_json_strings(self):
strings_file = self._dir / "strings.json"
with open(strings_file, 'w') as file:
file.write(JSON_SOURCE_STRINGS)
run_cli('create', '--force', '--database', self._csv, strings_file)
self.assertEqual(CSV_STRINGS.splitlines(),
self._csv.read_text().splitlines())
class LegacyDatabaseCommandLineTest(DatabaseCommandLineTest):
"""Test an ELF with the legacy plain string storage format."""
def setUp(self):
super().setUp()
self._elf = LEGACY_PLAIN_STRING_ELF
# The legacy approach for storing tokenized strings in an ELF always
# adds an entry for "", even if the empty string was never tokenized.
self._csv_test_domain = '00000000, ,""\n' + CSV_TEST_DOMAIN
@mock.patch('sys.stdout', new_callable=_mock_output)
def test_report(self, mock_stdout):
run_cli('report', self._elf)
report = EXPECTED_REPORT[str(TOKENIZED_ENTRIES_ELF)].copy()
# Count the implicitly added "" entry in TEST_DOMAIN.
report['TEST_DOMAIN']['present_entries'] += 1
report['TEST_DOMAIN']['present_size_bytes'] += 1
report['TEST_DOMAIN']['total_entries'] += 1
report['TEST_DOMAIN']['total_size_bytes'] += 1
# Rename "" to the legacy name "default"
report['default'] = report['']
del report['']
self.assertEqual({str(LEGACY_PLAIN_STRING_ELF): report},
json.loads(mock_stdout.buffer.getvalue()))
if __name__ == '__main__':
unittest.main()