| #!/usr/bin/env python3 |
| # Copyright 2020 The Pigweed Authors |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| # use this file except in compliance with the License. You may obtain a copy of |
| # the License at |
| # |
| # https://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations under |
| # the License. |
| """Generates test data for hash_test.cc.""" |
| |
| import datetime |
| import os |
| import random |
| |
| from pw_tokenizer import tokens |
| |
| HASH_LENGTHS = 80, 96, 128 |
| HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH' |
| |
| FILE_HEADER = """\ |
| // Copyright {year} The Pigweed Authors |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| // use this file except in compliance with the License. You may obtain a copy of |
| // the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| // License for the specific language governing permissions and limitations under |
| // the License. |
| |
| // AUTOGENERATED - DO NOT EDIT |
| // |
| // This file was generated by {script}. |
| // To make changes, update the script and run it to generate new files. |
| #pragma once |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <string_view> |
| |
| {includes} |
| |
| namespace pw::tokenizer {{ |
| |
| // Test a series of generated test cases. |
| inline constexpr struct {{ |
| std::string_view string; |
| size_t hash_length; |
| uint32_t python_calculated_hash; |
| uint32_t macro_calculated_hash; // clang-format off |
| }} kHashTests[] = {{ |
| |
| """ |
| |
| FILE_FOOTER = """ |
| }; // kHashTests |
| |
| // clang-format on |
| |
| } // namespace pw::tokenizer |
| """ |
| |
| _TEST_CASE = """{{ |
| std::string_view("{str}", {string_length}u), |
| {hash_length}u, // fixed hash length |
| UINT32_C({hash}), // Python-calculated hash |
| {macro}("{str}"), // macro-calculated hash |
| }}, |
| """ |
| |
| |
| def _include_paths(lengths): |
| return '\n'.join( |
| sorted( |
| '#include "pw_tokenizer/internal/' |
| 'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length) |
| for length in lengths)) |
| |
| |
| def _test_case_at_length(data, hash_length): |
| """Generates a test case for a particular hash length.""" |
| |
| if isinstance(data, str): |
| data = data.encode() |
| |
| if all(ord(' ') <= b <= ord('~') for b in data): |
| escaped_str = data.decode().replace('"', r'\"') |
| else: |
| escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data) |
| |
| return _TEST_CASE.format(str=escaped_str, |
| string_length=len(data), |
| hash_length=hash_length, |
| hash=tokens.pw_tokenizer_65599_hash( |
| data, hash_length), |
| macro=HASH_MACRO.format(hash_length)) |
| |
| |
| def test_case(data): |
| return ''.join( |
| _test_case_at_length(data, length) for length in (80, 96, 128)) |
| |
| |
| def generate_test_cases(): |
| yield test_case('') |
| yield test_case(b'\xa1') |
| yield test_case(b'\xff') |
| yield test_case('\0') |
| yield test_case('\0\0') |
| yield test_case('a') |
| yield test_case('A') |
| yield test_case('hello, "world"') |
| yield test_case('YO' * 100) |
| |
| random.seed(600613) |
| |
| random_string = lambda size: bytes( |
| random.randrange(256) for _ in range(size)) |
| |
| for i in range(1, 16): |
| yield test_case(random_string(i)) |
| yield test_case(random_string(i)) |
| |
| for length in HASH_LENGTHS: |
| yield test_case(random_string(length - 1)) |
| yield test_case(random_string(length)) |
| yield test_case(random_string(length + 1)) |
| |
| |
| if __name__ == '__main__': |
| path = os.path.realpath( |
| os.path.join(os.path.dirname(__file__), '..', 'pw_tokenizer_private', |
| 'generated_hash_test_cases.h')) |
| |
| with open(path, 'w') as output: |
| output.write( |
| FILE_HEADER.format(year=datetime.date.today().year, |
| script=os.path.basename(__file__), |
| includes=_include_paths(HASH_LENGTHS))) |
| |
| for case in generate_test_cases(): |
| output.write(case) |
| |
| output.write(FILE_FOOTER) |
| |
| print('Wrote test data to', path) |