From 9b5acc563367149c27bc7a4b464f98bf06eeb59a Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Thu, 25 May 2023 18:04:05 -0700
Subject: decodetree: Add --test-for-error

Invert the exit code, for use with the testsuite.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'scripts')
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index a03dc6b..3f9f687 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -35,6 +35,7 @@ arguments = {}
 formats = {}
 allpatterns = []
 anyextern = False
+testforerror = False
 
 translate_prefix = 'trans'
 translate_scope = 'static '
@@ -71,7 +72,7 @@ def error_with_file(file, lineno, *args):
     if output_file and output_fd:
         output_fd.close()
         os.remove(output_file)
-    exit(1)
+    exit(0 if testforerror else 1)
 # end error_with_file
 
 
@@ -1286,11 +1287,12 @@ def main():
     global bitop_width
     global variablewidth
     global anyextern
+    global testforerror
 
     decode_scope = 'static '
 
     long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
-                 'static-decode=', 'varinsnwidth=']
+                 'static-decode=', 'varinsnwidth=', 'test-for-error']
     try:
         (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
     except getopt.GetoptError as err:
@@ -1319,6 +1321,8 @@ def main():
                 bitop_width = 64
             elif insnwidth != 32:
                 error(0, 'cannot handle insns of width', insnwidth)
+        elif o == '--test-for-error':
+            testforerror = True
         else:
             assert False, 'unhandled option'
 
@@ -1417,6 +1421,7 @@ def main():
 
     if output_file:
         output_fd.close()
+    exit(1 if testforerror else 0)
 # end main
 
 
-- 
cgit v1.1


From 2fd2eb5a247e641adc36a67318a791b417afea46 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Thu, 25 May 2023 18:45:43 -0700
Subject: decodetree: Fix recursion in prop_format and build_tree

Two copy-paste errors walking the parse tree.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'scripts')

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 3f9f687..e2640cc 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -474,7 +474,7 @@ class MultiPattern(General):
 
     def prop_format(self):
         for p in self.pats:
-            p.build_tree()
+            p.prop_format()
 
     def prop_width(self):
         width = None
@@ -624,7 +624,7 @@ class ExcMultiPattern(MultiPattern):
         return t
 
     def build_tree(self):
-        super().prop_format()
+        super().build_tree()
         self.tree = self.__build_tree(self.pats, self.fixedbits,
                                       self.fixedmask)
 
-- 
cgit v1.1


From f26044717679331ba31ba9bb911e059a13a49599 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Thu, 25 May 2023 18:50:58 -0700
Subject: decodetree: Diagnose empty pattern group

Test err_pattern_group_empty.decode failed with exception:

Traceback (most recent call last):
  File "./scripts/decodetree.py", line 1424, in <module> main()
  File "./scripts/decodetree.py", line 1342, in main toppat.build_tree()
  File "./scripts/decodetree.py", line 627, in build_tree
    self.tree = self.__build_tree(self.pats, self.fixedbits,
  File "./scripts/decodetree.py", line 607, in __build_tree
    fb = i.fixedbits & innermask
TypeError: unsupported operand type(s) for &: 'NoneType' and 'int'

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'scripts')

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index e2640cc..e4ef0a0 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -506,6 +506,12 @@ class IncMultiPattern(MultiPattern):
                 output(ind, '}\n')
             else:
                 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
+
+    def build_tree(self):
+        if not self.pats:
+            error_with_file(self.file, self.lineno, 'empty pattern group')
+        super().build_tree()
+
 #end IncMultiPattern
 
 
-- 
cgit v1.1


From 036cc75ca0bff26bfe75dc721e641d812cad3c09 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Fri, 26 May 2023 10:22:51 -0700
Subject: decodetree: Do not remove output_file from /dev

Nor report any PermissionError on remove.
The primary purpose is testing with -o /dev/null.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'scripts')

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index e4ef0a0..a9a0cd0 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -71,7 +71,12 @@ def error_with_file(file, lineno, *args):
 
     if output_file and output_fd:
         output_fd.close()
-        os.remove(output_file)
+        # Do not try to remove e.g. -o /dev/null
+        if not output_file.startswith("/dev"):
+            try:
+                os.remove(output_file)
+            except PermissionError:
+                pass
     exit(0 if testforerror else 1)
 # end error_with_file
 
-- 
cgit v1.1


From aeac22ba1e91a40d1d831cb02a1935391e67c7e2 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 23 May 2023 13:04:44 +0100
Subject: scripts/decodetree: Pass lvalue-formatter function to str_extract()

To support referring to other named fields in field definitions, we
need to pass the str_extract() method a function which tells it how
to emit the code for a previously initialized named field.  (In
Pattern::output_code() the other field will be "u.f_foo.field", and
in Format::output_extract() it is "a->field".)

Refactor the two callsites that currently do "output code to
initialize each field", and have them pass a lambda that defines how
to format the lvalue in each case.  This is then used both in
emitting the LHS of the assignment and also passed down to
str_extract() as a new argument (unused at the moment, but will be
used in the following patch).

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230523120447.728365-4-peter.maydell@linaro.org>
---
 scripts/decodetree.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'scripts')

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index a9a0cd0..73d569c 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -211,7 +211,7 @@ class Field:
             s = ''
         return str(self.pos) + ':' + s + str(self.len)
 
-    def str_extract(self):
+    def str_extract(self, lvalue_formatter):
         global bitop_width
         s = 's' if self.sign else ''
         return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
@@ -234,12 +234,12 @@ class MultiField:
     def __str__(self):
         return str(self.subs)
 
-    def str_extract(self):
+    def str_extract(self, lvalue_formatter):
         global bitop_width
         ret = '0'
         pos = 0
         for f in reversed(self.subs):
-            ext = f.str_extract()
+            ext = f.str_extract(lvalue_formatter)
             if pos == 0:
                 ret = ext
             else:
@@ -270,7 +270,7 @@ class ConstField:
     def __str__(self):
         return str(self.value)
 
-    def str_extract(self):
+    def str_extract(self, lvalue_formatter):
         return str(self.value)
 
     def __cmp__(self, other):
@@ -289,8 +289,9 @@ class FunctionField:
     def __str__(self):
         return self.func + '(' + str(self.base) + ')'
 
-    def str_extract(self):
-        return self.func + '(ctx, ' + self.base.str_extract() + ')'
+    def str_extract(self, lvalue_formatter):
+        return (self.func + '(ctx, '
+                + self.base.str_extract(lvalue_formatter) + ')')
 
     def __eq__(self, other):
         return self.func == other.func and self.base == other.base
@@ -310,7 +311,7 @@ class ParameterField:
     def __str__(self):
         return self.func
 
-    def str_extract(self):
+    def str_extract(self, lvalue_formatter):
         return self.func + '(ctx)'
 
     def __eq__(self, other):
@@ -363,6 +364,11 @@ class General:
 
     def str1(self, i):
         return str_indent(i) + self.__str__()
+
+    def output_fields(self, indent, lvalue_formatter):
+        for n, f in self.fields.items():
+            output(indent, lvalue_formatter(n), ' = ',
+                   f.str_extract(lvalue_formatter), ';\n')
 # end General
 
 
@@ -376,8 +382,7 @@ class Format(General):
     def output_extract(self):
         output('static void ', self.extract_name(), '(DisasContext *ctx, ',
                self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
-        for n, f in self.fields.items():
-            output('    a->', n, ' = ', f.str_extract(), ';\n')
+        self.output_fields(str_indent(4), lambda n: 'a->' + n)
         output('}\n\n')
 # end Format
 
@@ -401,8 +406,7 @@ class Pattern(General):
         if not extracted:
             output(ind, self.base.extract_name(),
                    '(ctx, &u.f_', arg, ', insn);\n')
-        for n, f in self.fields.items():
-            output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
+        self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
         output(ind, 'if (', translate_prefix, '_', self.name,
                '(ctx, &u.f_', arg, ')) return true;\n')
 
-- 
cgit v1.1


From 36d612448273d0c295f519d9df3b10208177487a Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 23 May 2023 13:04:45 +0100
Subject: scripts/decodetree: Implement a topological sort

To support named fields, we will need to be able to do a topological
sort (so that we ensure that we output the assignment to field A
before the assignment to field B if field B refers to field A by
name). The good news is that there is a tsort in the python standard
library; the bad news is that it was only added in Python 3.9.

To bridge the gap between our current minimum supported Python
version and 3.9, provide a local implementation that has the
same API as the stdlib version for the parts we care about.
In future when QEMU's minimum Python version requirement reaches
3.9 we can delete this code and replace it with an 'import' line.

The core of this implementation is based on
https://code.activestate.com/recipes/578272-topological-sort/
which is MIT-licensed.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230523120447.728365-5-peter.maydell@linaro.org>
---
 scripts/decodetree.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'scripts')

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 73d569c..db019a2 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -54,6 +54,80 @@ re_fld_ident = '%[a-zA-Z0-9_]*'
 re_fmt_ident = '@[a-zA-Z0-9_]*'
 re_pat_ident = '[a-zA-Z0-9_]*'
 
+# Local implementation of a topological sort. We use the same API that
+# the Python graphlib does, so that when QEMU moves forward to a
+# baseline of Python 3.9 or newer this code can all be dropped and
+# replaced with:
+#    from graphlib import TopologicalSorter, CycleError
+#
+# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
+#
+# We only implement the parts of TopologicalSorter we care about:
+#  ts = TopologicalSorter(graph=None)
+#    create the sorter. graph is a dictionary whose keys are
+#    nodes and whose values are lists of the predecessors of that node.
+#    (That is, if graph contains "A" -> ["B", "C"] then we must output
+#    B and C before A.)
+#  ts.static_order()
+#    returns a list of all the nodes in sorted order, or raises CycleError
+#  CycleError
+#    exception raised if there are cycles in the graph. The second
+#    element in the args attribute is a list of nodes which form a
+#    cycle; the first and last element are the same, eg [a, b, c, a]
+#    (Our implementation doesn't give the order correctly.)
+#
+# For our purposes we can assume that the data set is always small
+# (typically 10 nodes or less, actual links in the graph very rare),
+# so we don't need to worry about efficiency of implementation.
+#
+# The core of this implementation is from
+# https://code.activestate.com/recipes/578272-topological-sort/
+# (but updated to Python 3), and is under the MIT license.
+
+class CycleError(ValueError):
+    """Subclass of ValueError raised if cycles exist in the graph"""
+    pass
+
+class TopologicalSorter:
+    """Topologically sort a graph"""
+    def __init__(self, graph=None):
+        self.graph = graph
+
+    def static_order(self):
+        # We do the sort right here, unlike the stdlib version
+        from functools import reduce
+        data = {}
+        r = []
+
+        if not self.graph:
+            return []
+
+        # This code wants the values in the dict to be specifically sets
+        for k, v in self.graph.items():
+            data[k] = set(v)
+
+        # Find all items that don't depend on anything.
+        extra_items_in_deps = (reduce(set.union, data.values())
+                               - set(data.keys()))
+        # Add empty dependencies where needed
+        data.update({item:{} for item in extra_items_in_deps})
+        while True:
+            ordered = set(item for item, dep in data.items() if not dep)
+            if not ordered:
+                break
+            r.extend(ordered)
+            data = {item: (dep - ordered)
+                    for item, dep in data.items()
+                        if item not in ordered}
+        if data:
+            # This doesn't give as nice results as the stdlib, which
+            # gives you the cycle by listing the nodes in order. Here
+            # we only know the nodes in the cycle but not their order.
+            raise CycleError(f'nodes are in a cycle', list(data.keys()))
+
+        return r
+# end TopologicalSorter
+
 def error_with_file(file, lineno, *args):
     """Print an error message from file:line and args and exit."""
     global output_file
-- 
cgit v1.1


From 7e6c28be2739c2286fe09702adef4740d9a1ed41 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 23 May 2023 13:04:46 +0100
Subject: scripts/decodetree: Implement named field support

Implement support for named fields, i.e.  where one field is defined
in terms of another, rather than directly in terms of bits extracted
from the instruction.

The new method referenced_fields() on all the Field classes returns a
list of fields that this field references.  This just passes through,
except for the new NamedField class.

We can then use referenced_fields() to:
 * construct a list of 'dangling references' for a format or
   pattern, which is the fields that the format/pattern uses but
   doesn't define itself
 * do a topological sort, so that we output "field = value"
   assignments in an order that means that we assign a field before
   we reference it in a subsequent assignment
 * check when we output the code for a pattern whether we need to
   fill in the format fields before or after the pattern fields, and
   do other error checking

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20230523120447.728365-6-peter.maydell@linaro.org>
---
 scripts/decodetree.py | 145 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 139 insertions(+), 6 deletions(-)

(limited to 'scripts')

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index db019a2..13db585 100644
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -290,6 +290,9 @@ class Field:
         s = 's' if self.sign else ''
         return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
 
+    def referenced_fields(self):
+        return []
+
     def __eq__(self, other):
         return self.sign == other.sign and self.mask == other.mask
 
@@ -321,6 +324,12 @@ class MultiField:
             pos += f.len
         return ret
 
+    def referenced_fields(self):
+        l = []
+        for f in self.subs:
+            l.extend(f.referenced_fields())
+        return l
+
     def __ne__(self, other):
         if len(self.subs) != len(other.subs):
             return True
@@ -347,6 +356,9 @@ class ConstField:
     def str_extract(self, lvalue_formatter):
         return str(self.value)
 
+    def referenced_fields(self):
+        return []
+
     def __cmp__(self, other):
         return self.value - other.value
 # end ConstField
@@ -367,6 +379,9 @@ class FunctionField:
         return (self.func + '(ctx, '
                 + self.base.str_extract(lvalue_formatter) + ')')
 
+    def referenced_fields(self):
+        return self.base.referenced_fields()
+
     def __eq__(self, other):
         return self.func == other.func and self.base == other.base
 
@@ -388,6 +403,9 @@ class ParameterField:
     def str_extract(self, lvalue_formatter):
         return self.func + '(ctx)'
 
+    def referenced_fields(self):
+        return []
+
     def __eq__(self, other):
         return self.func == other.func
 
@@ -395,6 +413,32 @@ class ParameterField:
         return not self.__eq__(other)
 # end ParameterField
 
+class NamedField:
+    """Class representing a field already named in the pattern"""
+    def __init__(self, name, sign, len):
+        self.mask = 0
+        self.sign = sign
+        self.len = len
+        self.name = name
+
+    def __str__(self):
+        return self.name
+
+    def str_extract(self, lvalue_formatter):
+        global bitop_width
+        s = 's' if self.sign else ''
+        lvalue = lvalue_formatter(self.name)
+        return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})'
+
+    def referenced_fields(self):
+        return [self.name]
+
+    def __eq__(self, other):
+        return self.name == other.name
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+# end NamedField
 
 class Arguments:
     """Class representing the extracted fields of a format"""
@@ -418,7 +462,6 @@ class Arguments:
             output('} ', self.struct_name(), ';\n\n')
 # end Arguments
 
-
 class General:
     """Common code between instruction formats and instruction patterns"""
     def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
@@ -432,6 +475,7 @@ class General:
         self.fieldmask = fldm
         self.fields = flds
         self.width = w
+        self.dangling = None
 
     def __str__(self):
         return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
@@ -439,10 +483,51 @@ class General:
     def str1(self, i):
         return str_indent(i) + self.__str__()
 
+    def dangling_references(self):
+        # Return a list of all named references which aren't satisfied
+        # directly by this format/pattern. This will be either:
+        #  * a format referring to a field which is specified by the
+        #    pattern(s) using it
+        #  * a pattern referring to a field which is specified by the
+        #    format it uses
+        #  * a user error (referring to a field that doesn't exist at all)
+        if self.dangling is None:
+            # Compute this once and cache the answer
+            dangling = []
+            for n, f in self.fields.items():
+                for r in f.referenced_fields():
+                    if r not in self.fields:
+                        dangling.append(r)
+            self.dangling = dangling
+        return self.dangling
+
     def output_fields(self, indent, lvalue_formatter):
+        # We use a topological sort to ensure that any use of NamedField
+        # comes after the initialization of the field it is referencing.
+        graph = {}
         for n, f in self.fields.items():
-            output(indent, lvalue_formatter(n), ' = ',
-                   f.str_extract(lvalue_formatter), ';\n')
+            refs = f.referenced_fields()
+            graph[n] = refs
+
+        try:
+            ts = TopologicalSorter(graph)
+            for n in ts.static_order():
+                # We only want to emit assignments for the keys
+                # in our fields list, not for anything that ends up
+                # in the tsort graph only because it was referenced as
+                # a NamedField.
+                try:
+                    f = self.fields[n]
+                    output(indent, lvalue_formatter(n), ' = ',
+                           f.str_extract(lvalue_formatter), ';\n')
+                except KeyError:
+                    pass
+        except CycleError as e:
+            # The second element of args is a list of nodes which form
+            # a cycle (there might be others too, but only one is reported).
+            # Pretty-print it to tell the user.
+            cycle = ' => '.join(e.args[1])
+            error(self.lineno, 'field definitions form a cycle: ' + cycle)
 # end General
 
 
@@ -477,10 +562,36 @@ class Pattern(General):
         ind = str_indent(i)
         arg = self.base.base.name
         output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
+        # We might have named references in the format that refer to fields
+        # in the pattern, or named references in the pattern that refer
+        # to fields in the format. This affects whether we extract the fields
+        # for the format before or after the ones for the pattern.
+        # For simplicity we don't allow cross references in both directions.
+        # This is also where we catch the syntax error of referring to
+        # a nonexistent field.
+        fmt_refs = self.base.dangling_references()
+        for r in fmt_refs:
+            if r not in self.fields:
+                error(self.lineno, f'format refers to undefined field {r}')
+        pat_refs = self.dangling_references()
+        for r in pat_refs:
+            if r not in self.base.fields:
+                error(self.lineno, f'pattern refers to undefined field {r}')
+        if pat_refs and fmt_refs:
+            error(self.lineno, ('pattern that uses fields defined in format '
+                                'cannot use format that uses fields defined '
+                                'in pattern'))
+        if fmt_refs:
+            # pattern fields first
+            self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
+            assert not extracted, "dangling fmt refs but it was already extracted"
         if not extracted:
             output(ind, self.base.extract_name(),
                    '(ctx, &u.f_', arg, ', insn);\n')
-        self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
+        if not fmt_refs:
+            # pattern fields last
+            self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
+
         output(ind, 'if (', translate_prefix, '_', self.name,
                '(ctx, &u.f_', arg, ')) return true;\n')
 
@@ -626,8 +737,10 @@ class Tree:
         ind = str_indent(i)
 
         # If we identified all nodes below have the same format,
-        # extract the fields now.
-        if not extracted and self.base:
+        # extract the fields now. But don't do it if the format relies
+        # on named fields from the insn pattern, as those won't have
+        # been initialised at this point.
+        if not extracted and self.base and not self.base.dangling_references():
             output(ind, self.base.extract_name(),
                    '(ctx, &u.f_', self.base.base.name, ', insn);\n')
             extracted = True
@@ -749,6 +862,7 @@ def parse_field(lineno, name, toks):
     """Parse one instruction field from TOKS at LINENO"""
     global fields
     global insnwidth
+    global re_C_ident
 
     # A "simple" field will have only one entry;
     # a "multifield" will have several.
@@ -763,6 +877,25 @@ def parse_field(lineno, name, toks):
             func = func[1]
             continue
 
+        if re.fullmatch(re_C_ident + ':s[0-9]+', t):
+            # Signed named field
+            subtoks = t.split(':')
+            n = subtoks[0]
+            le = int(subtoks[1])
+            f = NamedField(n, True, le)
+            subs.append(f)
+            width += le
+            continue
+        if re.fullmatch(re_C_ident + ':[0-9]+', t):
+            # Unsigned named field
+            subtoks = t.split(':')
+            n = subtoks[0]
+            le = int(subtoks[1])
+            f = NamedField(n, False, le)
+            subs.append(f)
+            width += le
+            continue
+
         if re.fullmatch('[0-9]+:s[0-9]+', t):
             # Signed field extract
             subtoks = t.split(':s')
-- 
cgit v1.1