aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFlorian Weimer <fweimer@redhat.com>2022-09-22 12:10:41 +0200
committerFlorian Weimer <fweimer@redhat.com>2022-09-22 12:10:41 +0200
commite6e6184bed490403811771fa527eb95b4ae53c7c (patch)
treed109a6b06b8f2a868cf5333ec86152199d13a5d2 /scripts
parentf40c7887d3cc9bb0b56576ed9edbe505ff8058c0 (diff)
downloadglibc-e6e6184bed490403811771fa527eb95b4ae53c7c.zip
glibc-e6e6184bed490403811771fa527eb95b4ae53c7c.tar.gz
glibc-e6e6184bed490403811771fa527eb95b4ae53c7c.tar.bz2
scripts: Enhance glibcpp to do basic macro processing
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/glibcpp.py317
1 files changed, 317 insertions, 0 deletions
diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py
index b44c6a4..455459a 100644
--- a/scripts/glibcpp.py
+++ b/scripts/glibcpp.py
@@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings.
"""
import collections
+import operator
import re
+import sys
# Caution: The order of the outermost alternation matters.
# STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
@@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter):
yield tok
pos = mo.end()
+
+class MacroDefinition(collections.namedtuple('MacroDefinition',
+ 'name_token args body error')):
+ """A preprocessor macro definition.
+
+ name_token is the Token_ for the name.
+
+ args is None for a macro that is not function-like. Otherwise, it
+ is a tuple that contains the macro argument name tokens.
+
+ body is a tuple that contains the tokens that constitue the body
+ of the macro definition (excluding whitespace).
+
+ error is None if no error was detected, or otherwise a problem
+ description associated with this macro definition.
+
+ """
+
+ @property
+ def function(self):
+ """Return true if the macro is function-like."""
+ return self.args is not None
+
+ @property
+ def name(self):
+ """Return the name of the macro being defined."""
+ return self.name_token.text
+
+ @property
+ def line(self):
+ """Return the line number of the macro defintion."""
+ return self.name_token.line
+
+ @property
+ def args_lowered(self):
+ """Return the macro argument list as a list of strings"""
+ if self.function:
+ return [token.text for token in self.args]
+ else:
+ return None
+
+ @property
+ def body_lowered(self):
+ """Return the macro body as a list of strings."""
+ return [token.text for token in self.body]
+
+def macro_definitions(tokens):
+ """A generator for C macro definitions among tokens.
+
+ The generator yields MacroDefinition objects.
+
+ tokens must be iterable, yielding Token_ objects.
+
+ """
+
+ macro_name = None
+ macro_start = False # Set to false after macro name and one otken.
+ macro_args = None # Set to a list during the macro argument sequence.
+ in_macro_args = False # True while processing macro identifier-list.
+ error = None
+ body = []
+
+ for token in tokens:
+ if token.context == 'define' and macro_name is None \
+ and token.kind == 'IDENT':
+ # Starting up macro processing.
+ if macro_start:
+ # First identifier is the macro name.
+ macro_name = token
+ else:
+ # Next token is the name.
+ macro_start = True
+ continue
+
+ if macro_name is None:
+ # Drop tokens not in macro definitions.
+ continue
+
+ if token.context != 'define':
+ # End of the macro definition.
+ if in_macro_args and error is None:
+ error = 'macro definition ends in macro argument list'
+ yield MacroDefinition(macro_name, macro_args, tuple(body), error)
+ # No longer in a macro definition.
+ macro_name = None
+ macro_start = False
+ macro_args = None
+ in_macro_args = False
+ error = None
+ body.clear()
+ continue
+
+ if macro_start:
+ # First token after the macro name.
+ macro_start = False
+ if token.kind == 'PUNCTUATOR' and token.text == '(':
+ macro_args = []
+ in_macro_args = True
+ continue
+
+ if in_macro_args:
+ if token.kind == 'IDENT' \
+ or (token.kind == 'PUNCTUATOR' and token.text == '...'):
+ # Macro argument or ... placeholder.
+ macro_args.append(token)
+ if token.kind == 'PUNCTUATOR':
+ if token.text == ')':
+ macro_args = tuple(macro_args)
+ in_macro_args = False
+ elif token.text == ',':
+ pass # Skip. Not a full syntax check.
+ elif error is None:
+ error = 'invalid punctuator in macro argument list: ' \
+ + repr(token.text)
+ elif error is None:
+ error = 'invalid {} token in macro argument list'.format(
+ token.kind)
+ continue
+
+ if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'):
+ body.append(token)
+
+ # Emit the macro in case the last line does not end with a newline.
+ if macro_name is not None:
+ if in_macro_args and error is None:
+ error = 'macro definition ends in macro argument list'
+ yield MacroDefinition(macro_name, macro_args, tuple(body), error)
+
+# Used to split UL etc. suffixes from numbers such as 123UL.
+RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)')
+
+BINARY_OPERATORS = {
+ '+': operator.add,
+ '<<': operator.lshift,
+}
+
+# Use the general-purpose dict type if it is order-preserving.
+if (sys.version_info[0], sys.version_info[1]) <= (3, 6):
+ OrderedDict = collections.OrderedDict
+else:
+ OrderedDict = dict
+
+def macro_eval(macro_defs, reporter):
+ """Compute macro values
+
+ macro_defs is the output from macro_definitions. reporter is an
+ object that accepts reporter.error(line_number, message) and
+ reporter.note(line_number, message) calls to report errors
+ and error context invocations.
+
+ The returned dict contains the values of macros which are not
+ function-like, pairing their names with their computed values.
+
+ The current implementation is incomplete. It is deliberately not
+ entirely faithful to C, even in the implemented parts. It checks
+ that macro replacements follow certain syntactic rules even if
+ they are never evaluated.
+
+ """
+
+ # Unevaluated macro definitions by name.
+ definitions = OrderedDict()
+ for md in macro_defs:
+ if md.name in definitions:
+ reporter.error(md.line, 'macro {} redefined'.format(md.name))
+ reporter.note(definitions[md.name].line,
+ 'location of previous definition')
+ else:
+ definitions[md.name] = md
+
+ # String to value mappings for fully evaluated macros.
+ evaluated = OrderedDict()
+
+ # String to macro definitions during evaluation. Nice error
+ # reporting relies on determinstic iteration order.
+ stack = OrderedDict()
+
+ def eval_token(current, token):
+ """Evaluate one macro token.
+
+ Integers and strings are returned as such (the latter still
+ quoted). Identifiers are expanded.
+
+ None indicates an empty expansion or an error.
+
+ """
+
+ if token.kind == 'PP_NUMBER':
+ value = None
+ m = RE_SPLIT_INTEGER_SUFFIX.match(token.text)
+ if m:
+ try:
+ value = int(m.group(1), 0)
+ except ValueError:
+ pass
+ if value is None:
+ reporter.error(token.line,
+ 'invalid number {!r} in definition of {}'.format(
+ token.text, current.name))
+ return value
+
+ if token.kind == 'STRING':
+ return token.text
+
+ if token.kind == 'CHARCONST' and len(token.text) == 3:
+ return ord(token.text[1])
+
+ if token.kind == 'IDENT':
+ name = token.text
+ result = eval1(current, name)
+ if name not in evaluated:
+ evaluated[name] = result
+ return result
+
+ reporter.error(token.line,
+ 'unrecognized {!r} in definition of {}'.format(
+ token.text, current.name))
+ return None
+
+
+ def eval1(current, name):
+ """Evaluate one name.
+
+ The name is looked up and the macro definition evaluated
+ recursively if necessary. The current argument is the macro
+ definition being evaluated.
+
+ None as a return value indicates an error.
+
+ """
+
+ # Fast path if the value has already been evaluated.
+ if name in evaluated:
+ return evaluated[name]
+
+ try:
+ md = definitions[name]
+ except KeyError:
+ reporter.error(current.line,
+ 'reference to undefined identifier {} in definition of {}'
+ .format(name, current.name))
+ return None
+
+ if md.name in stack:
+ # Recursive macro definition.
+ md = stack[name]
+ reporter.error(md.line,
+ 'macro definition {} refers to itself'.format(md.name))
+ for md1 in reversed(list(stack.values())):
+ if md1 is md:
+ break
+ reporter.note(md1.line,
+ 'evaluated from {}'.format(md1.name))
+ return None
+
+ stack[md.name] = md
+ if md.function:
+ reporter.error(current.line,
+ 'attempt to evaluate function-like macro {}'.format(name))
+ reporter.note(md.line, 'definition of {}'.format(md.name))
+ return None
+
+ try:
+ body = md.body
+ if len(body) == 0:
+ # Empty expansion.
+ return None
+
+ # Remove surrounding ().
+ if body[0].text == '(' and body[-1].text == ')':
+ body = body[1:-1]
+ had_parens = True
+ else:
+ had_parens = False
+
+ if len(body) == 1:
+ return eval_token(md, body[0])
+
+ # Minimal expression evaluator for binary operators.
+ op = body[1].text
+ if len(body) == 3 and op in BINARY_OPERATORS:
+ if not had_parens:
+ reporter.error(body[1].line,
+ 'missing parentheses around {} expression'.format(op))
+ reporter.note(md.line,
+ 'in definition of macro {}'.format(md.name))
+
+ left = eval_token(md, body[0])
+ right = eval_token(md, body[2])
+
+ if type(left) != type(1):
+ reporter.error(left.line,
+ 'left operand of {} is not an integer'.format(op))
+ reporter.note(md.line,
+ 'in definition of macro {}'.format(md.name))
+ if type(right) != type(1):
+ reporter.error(left.line,
+ 'right operand of {} is not an integer'.format(op))
+ reporter.note(md.line,
+ 'in definition of macro {}'.format(md.name))
+ return BINARY_OPERATORS[op](left, right)
+
+ reporter.error(md.line,
+ 'uninterpretable macro token sequence: {}'.format(
+ ' '.join(md.body_lowered)))
+ return None
+ finally:
+ del stack[md.name]
+
+ # Start of main body of macro_eval.
+ for md in definitions.values():
+ name = md.name
+ if name not in evaluated and not md.function:
+ evaluated[name] = eval1(md, name)
+ return evaluated