Support non-ASCII characters in headers
Filter out non-ASCII characters in automatically processed headers.
Do this in a way that minimizes the code change: keep manipulating
strings, but strip off non-ASCII characters when reading lines, which
should only remove characters in comments that we don't parse anyway.
diff --git a/scripts/generate_psa_constants.py b/scripts/generate_psa_constants.py
index c2d2558..a9de148 100755
--- a/scripts/generate_psa_constants.py
+++ b/scripts/generate_psa_constants.py
@@ -270,11 +270,16 @@
# Other macro without parameter
return
+ _nonascii_re = re.compile(rb'[^\x00-\x7f]+')
+ _continued_line_re = re.compile(rb'\\\r?\n\Z')
def read_file(self, header_file):
for line in header_file:
- while line.endswith('\\\n'):
+ m = re.search(self._continued_line_re, line)
+ while m:
cont = next(header_file)
- line = line[:-2] + cont
+ line = line[:m.start(0)] + cont
+ m = re.search(self._continued_line_re, line)
+ line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
self.read_line(line)
@staticmethod
@@ -380,7 +385,7 @@
def generate_psa_constants(header_file_names, output_file_name):
collector = MacroCollector()
for header_file_name in header_file_names:
- with open(header_file_name) as header_file:
+ with open(header_file_name, 'rb') as header_file:
collector.read_file(header_file)
temp_file_name = output_file_name + '.tmp'
with open(temp_file_name, 'w') as output_file: