| """Helper functions to parse C code in heavily constrained scenarios. |
| |
| Currently supported functionality: |
| |
| * read_function_declarations: read function declarations from a header file. |
| """ |
| |
| # Copyright The Mbed TLS Contributors |
| # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later |
| |
| ### WARNING: the code in this file has not been extensively reviewed yet. |
| ### We do not think it is harmful, but it may be below our normal standards |
| ### for robustness and maintainability. |
| |
| import re |
| from typing import Dict, Iterable, Iterator, List, Optional, Tuple |
| |
| |
| class ArgumentInfo: |
| """Information about an argument to an API function.""" |
| #pylint: disable=too-few-public-methods |
| |
| _KEYWORDS = [ |
| 'const', 'register', 'restrict', |
| 'int', 'long', 'short', 'signed', 'unsigned', |
| ] |
| _DECLARATION_RE = re.compile( |
| r'(?P<type>\w[\w\s*]*?)\s*' + |
| r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' + |
| r'\s*(?P<suffix>\[[^][]*\])?\Z', |
| re.A | re.S) |
| |
| @classmethod |
| def normalize_type(cls, typ: str) -> str: |
| """Normalize whitespace in a type.""" |
| typ = re.sub(r'\s+', r' ', typ) |
| typ = re.sub(r'\s*\*', r' *', typ) |
| return typ |
| |
| def __init__(self, decl: str) -> None: |
| self.decl = decl.strip() |
| m = self._DECLARATION_RE.match(self.decl) |
| if not m: |
| raise ValueError(self.decl) |
| self.type = self.normalize_type(m.group('type')) #type: str |
| self.name = m.group('name') #type: Optional[str] |
| self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str |
| |
| |
| class FunctionInfo: |
| """Information about an API function.""" |
| #pylint: disable=too-few-public-methods |
| |
| # Regex matching the declaration of a function that returns void. |
| VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A) |
| |
| def __init__(self, #pylint: disable=too-many-arguments |
| filename: str, |
| line_number: int, |
| qualifiers: Iterable[str], |
| return_type: str, |
| name: str, |
| arguments: List[str]) -> None: |
| self.filename = filename |
| self.line_number = line_number |
| self.qualifiers = frozenset(qualifiers) |
| self.return_type = return_type |
| self.name = name |
| self.arguments = [ArgumentInfo(arg) for arg in arguments] |
| |
| def returns_void(self) -> bool: |
| """Whether the function returns void.""" |
| return bool(self.VOID_RE.search(self.return_type)) |
| |
| |
| # Match one C comment. |
| # Note that we match both comment types, so things like // in a /*...*/ |
| # comment are handled correctly. |
| _C_COMMENT_RE = re.compile(r'//(?:[^\n]|\\\n)*|/\*.*?\*/', re.S) |
| _NOT_NEWLINES_RE = re.compile(r'[^\n]+') |
| |
| def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]: |
| """Read logical lines from a file. |
| |
| Logical lines are one or more physical line, with balanced parentheses. |
| """ |
| with open(filename, encoding='utf-8') as inp: |
| content = inp.read() |
| # Strip comments, but keep newlines for line numbering |
| content = re.sub(_C_COMMENT_RE, |
| lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)), |
| content) |
| lines = enumerate(content.splitlines(), 1) |
| for line_number, line in lines: |
| # Read a logical line, containing balanced parentheses. |
| # We assume that parentheses are balanced (this should be ok |
| # since comments have been stripped), otherwise there will be |
| # a gigantic logical line at the end. |
| paren_level = line.count('(') - line.count(')') |
| while paren_level > 0: |
| _, more = next(lines) #pylint: disable=stop-iteration-return |
| paren_level += more.count('(') - more.count(')') |
| line += '\n' + more |
| yield line_number, line |
| |
| _C_FUNCTION_DECLARATION_RE = re.compile( |
| r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)' |
| r'(?P<return_type>\w[\w\s*]*?)\s*' + |
| r'\b(?P<name>\w+)' + |
| r'\s*\((?P<arguments>.*)\)\s*;', |
| re.A | re.S) |
| |
| def read_function_declarations(functions: Dict[str, FunctionInfo], |
| filename: str) -> None: |
| """Collect function declarations from a C header file.""" |
| for line_number, line in read_logical_lines(filename): |
| m = _C_FUNCTION_DECLARATION_RE.match(line) |
| if not m: |
| continue |
| qualifiers = m.group('qualifiers').split() |
| return_type = m.group('return_type') |
| name = m.group('name') |
| arguments = m.group('arguments').split(',') |
| if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]): |
| arguments = [] |
| # Note: we replace any existing declaration for the same name. |
| functions[name] = FunctionInfo(filename, line_number, |
| qualifiers, |
| return_type, |
| name, |
| arguments) |