diff options
author | Florian Weimer <fweimer@redhat.com> | 2022-09-22 12:10:41 +0200 |
---|---|---|
committer | Florian Weimer <fweimer@redhat.com> | 2022-09-22 12:10:41 +0200 |
commit | e6e6184bed490403811771fa527eb95b4ae53c7c (patch) | |
tree | d109a6b06b8f2a868cf5333ec86152199d13a5d2 /scripts | |
parent | f40c7887d3cc9bb0b56576ed9edbe505ff8058c0 (diff) | |
download | glibc-e6e6184bed490403811771fa527eb95b4ae53c7c.zip glibc-e6e6184bed490403811771fa527eb95b4ae53c7c.tar.gz glibc-e6e6184bed490403811771fa527eb95b4ae53c7c.tar.bz2 |
scripts: Enhance glibcpp to do basic macro processing
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/glibcpp.py | 317 |
1 files changed, 317 insertions, 0 deletions
diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py index b44c6a4..455459a 100644 --- a/scripts/glibcpp.py +++ b/scripts/glibcpp.py @@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings. """ import collections +import operator import re +import sys # Caution: The order of the outermost alternation matters. # STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST, @@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter): yield tok pos = mo.end() + +class MacroDefinition(collections.namedtuple('MacroDefinition', + 'name_token args body error')): + """A preprocessor macro definition. + + name_token is the Token_ for the name. + + args is None for a macro that is not function-like. Otherwise, it + is a tuple that contains the macro argument name tokens. + + body is a tuple that contains the tokens that constitue the body + of the macro definition (excluding whitespace). + + error is None if no error was detected, or otherwise a problem + description associated with this macro definition. + + """ + + @property + def function(self): + """Return true if the macro is function-like.""" + return self.args is not None + + @property + def name(self): + """Return the name of the macro being defined.""" + return self.name_token.text + + @property + def line(self): + """Return the line number of the macro defintion.""" + return self.name_token.line + + @property + def args_lowered(self): + """Return the macro argument list as a list of strings""" + if self.function: + return [token.text for token in self.args] + else: + return None + + @property + def body_lowered(self): + """Return the macro body as a list of strings.""" + return [token.text for token in self.body] + +def macro_definitions(tokens): + """A generator for C macro definitions among tokens. + + The generator yields MacroDefinition objects. + + tokens must be iterable, yielding Token_ objects. + + """ + + macro_name = None + macro_start = False # Set to false after macro name and one otken. + macro_args = None # Set to a list during the macro argument sequence. + in_macro_args = False # True while processing macro identifier-list. + error = None + body = [] + + for token in tokens: + if token.context == 'define' and macro_name is None \ + and token.kind == 'IDENT': + # Starting up macro processing. + if macro_start: + # First identifier is the macro name. + macro_name = token + else: + # Next token is the name. + macro_start = True + continue + + if macro_name is None: + # Drop tokens not in macro definitions. + continue + + if token.context != 'define': + # End of the macro definition. + if in_macro_args and error is None: + error = 'macro definition ends in macro argument list' + yield MacroDefinition(macro_name, macro_args, tuple(body), error) + # No longer in a macro definition. + macro_name = None + macro_start = False + macro_args = None + in_macro_args = False + error = None + body.clear() + continue + + if macro_start: + # First token after the macro name. + macro_start = False + if token.kind == 'PUNCTUATOR' and token.text == '(': + macro_args = [] + in_macro_args = True + continue + + if in_macro_args: + if token.kind == 'IDENT' \ + or (token.kind == 'PUNCTUATOR' and token.text == '...'): + # Macro argument or ... placeholder. + macro_args.append(token) + if token.kind == 'PUNCTUATOR': + if token.text == ')': + macro_args = tuple(macro_args) + in_macro_args = False + elif token.text == ',': + pass # Skip. Not a full syntax check. + elif error is None: + error = 'invalid punctuator in macro argument list: ' \ + + repr(token.text) + elif error is None: + error = 'invalid {} token in macro argument list'.format( + token.kind) + continue + + if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'): + body.append(token) + + # Emit the macro in case the last line does not end with a newline. + if macro_name is not None: + if in_macro_args and error is None: + error = 'macro definition ends in macro argument list' + yield MacroDefinition(macro_name, macro_args, tuple(body), error) + +# Used to split UL etc. suffixes from numbers such as 123UL. +RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)') + +BINARY_OPERATORS = { + '+': operator.add, + '<<': operator.lshift, +} + +# Use the general-purpose dict type if it is order-preserving. +if (sys.version_info[0], sys.version_info[1]) <= (3, 6): + OrderedDict = collections.OrderedDict +else: + OrderedDict = dict + +def macro_eval(macro_defs, reporter): + """Compute macro values + + macro_defs is the output from macro_definitions. reporter is an + object that accepts reporter.error(line_number, message) and + reporter.note(line_number, message) calls to report errors + and error context invocations. + + The returned dict contains the values of macros which are not + function-like, pairing their names with their computed values. + + The current implementation is incomplete. It is deliberately not + entirely faithful to C, even in the implemented parts. It checks + that macro replacements follow certain syntactic rules even if + they are never evaluated. + + """ + + # Unevaluated macro definitions by name. + definitions = OrderedDict() + for md in macro_defs: + if md.name in definitions: + reporter.error(md.line, 'macro {} redefined'.format(md.name)) + reporter.note(definitions[md.name].line, + 'location of previous definition') + else: + definitions[md.name] = md + + # String to value mappings for fully evaluated macros. + evaluated = OrderedDict() + + # String to macro definitions during evaluation. Nice error + # reporting relies on determinstic iteration order. + stack = OrderedDict() + + def eval_token(current, token): + """Evaluate one macro token. + + Integers and strings are returned as such (the latter still + quoted). Identifiers are expanded. + + None indicates an empty expansion or an error. + + """ + + if token.kind == 'PP_NUMBER': + value = None + m = RE_SPLIT_INTEGER_SUFFIX.match(token.text) + if m: + try: + value = int(m.group(1), 0) + except ValueError: + pass + if value is None: + reporter.error(token.line, + 'invalid number {!r} in definition of {}'.format( + token.text, current.name)) + return value + + if token.kind == 'STRING': + return token.text + + if token.kind == 'CHARCONST' and len(token.text) == 3: + return ord(token.text[1]) + + if token.kind == 'IDENT': + name = token.text + result = eval1(current, name) + if name not in evaluated: + evaluated[name] = result + return result + + reporter.error(token.line, + 'unrecognized {!r} in definition of {}'.format( + token.text, current.name)) + return None + + + def eval1(current, name): + """Evaluate one name. + + The name is looked up and the macro definition evaluated + recursively if necessary. The current argument is the macro + definition being evaluated. + + None as a return value indicates an error. + + """ + + # Fast path if the value has already been evaluated. + if name in evaluated: + return evaluated[name] + + try: + md = definitions[name] + except KeyError: + reporter.error(current.line, + 'reference to undefined identifier {} in definition of {}' + .format(name, current.name)) + return None + + if md.name in stack: + # Recursive macro definition. + md = stack[name] + reporter.error(md.line, + 'macro definition {} refers to itself'.format(md.name)) + for md1 in reversed(list(stack.values())): + if md1 is md: + break + reporter.note(md1.line, + 'evaluated from {}'.format(md1.name)) + return None + + stack[md.name] = md + if md.function: + reporter.error(current.line, + 'attempt to evaluate function-like macro {}'.format(name)) + reporter.note(md.line, 'definition of {}'.format(md.name)) + return None + + try: + body = md.body + if len(body) == 0: + # Empty expansion. + return None + + # Remove surrounding (). + if body[0].text == '(' and body[-1].text == ')': + body = body[1:-1] + had_parens = True + else: + had_parens = False + + if len(body) == 1: + return eval_token(md, body[0]) + + # Minimal expression evaluator for binary operators. + op = body[1].text + if len(body) == 3 and op in BINARY_OPERATORS: + if not had_parens: + reporter.error(body[1].line, + 'missing parentheses around {} expression'.format(op)) + reporter.note(md.line, + 'in definition of macro {}'.format(md.name)) + + left = eval_token(md, body[0]) + right = eval_token(md, body[2]) + + if type(left) != type(1): + reporter.error(left.line, + 'left operand of {} is not an integer'.format(op)) + reporter.note(md.line, + 'in definition of macro {}'.format(md.name)) + if type(right) != type(1): + reporter.error(left.line, + 'right operand of {} is not an integer'.format(op)) + reporter.note(md.line, + 'in definition of macro {}'.format(md.name)) + return BINARY_OPERATORS[op](left, right) + + reporter.error(md.line, + 'uninterpretable macro token sequence: {}'.format( + ' '.join(md.body_lowered))) + return None + finally: + del stack[md.name] + + # Start of main body of macro_eval. + for md in definitions.values(): + name = md.name + if name not in evaluated and not md.function: + evaluated[name] = eval1(md, name) + return evaluated |