From 55fd189205493890a10245a6828b8f282ff518c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <f4bug@amsat.org>
Date: Sat, 10 Nov 2018 22:13:12 +0100
Subject: MAINTAINERS: Add scripts/decodetree.py to the TCG section
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Message-Id: <20181110211313.6922-2-f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d326756..3426d33 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -117,6 +117,7 @@ F: cpus.c
 F: exec.c
 F: accel/tcg/
 F: accel/stubs/tcg-stub.c
+F: scripts/decodetree.py
 F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/helper*.h
-- 
cgit v1.1


From 3fdbf5d6794db253b0a5173204bbbcc8a17bc9d1 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 23 Feb 2019 13:00:10 -0800
Subject: decodetree: Move documentation to docs/devel/decodetree.rst

One great big block comment isn't the best way to document
the syntax of a language.

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 MAINTAINERS               |   1 +
 docs/devel/decodetree.rst | 156 ++++++++++++++++++++++++++++++++++++++++++++++
 docs/devel/index.rst      |   2 +-
 scripts/decodetree.py     | 134 +--------------------------------------
 4 files changed, 159 insertions(+), 134 deletions(-)
 create mode 100644 docs/devel/decodetree.rst

diff --git a/MAINTAINERS b/MAINTAINERS
index 3426d33..564b8db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -118,6 +118,7 @@ F: exec.c
 F: accel/tcg/
 F: accel/stubs/tcg-stub.c
 F: scripts/decodetree.py
+F: docs/devel/decodetree.rst
 F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/helper*.h
diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst
new file mode 100644
index 0000000..d9be30b
--- /dev/null
+++ b/docs/devel/decodetree.rst
@@ -0,0 +1,156 @@
+========================
+Decodetree Specification
+========================
+
+A *decodetree* is built from instruction *patterns*.  A pattern may
+represent a single architectural instruction or a group of same, depending
+on what is convenient for further processing.
+
+Each pattern has both *fixedbits* and *fixedmask*, the combination of which
+describes the condition under which the pattern is matched::
+
+  (insn & fixedmask) == fixedbits
+
+Each pattern may have *fields*, which are extracted from the insn and
+passed along to the translator.  Examples of such are registers,
+immediates, and sub-opcodes.
+
+In support of patterns, one may declare *fields*, *argument sets*, and
+*formats*, each of which may be re-used to simplify further definitions.
+
+Fields
+======
+
+Syntax::
+
+  field_def     := '%' identifier ( unnamed_field )+ ( !function=identifier )?
+  unnamed_field := number ':' ( 's' ) number
+
+For *unnamed_field*, the first number is the least-significant bit position
+of the field and the second number is the length of the field.  If the 's' is
+present, the field is considered signed.  If multiple ``unnamed_fields`` are
+present, they are concatenated.  In this way one can define disjoint fields.
+
+If ``!function`` is specified, the concatenated result is passed through the
+named function, taking and returning an integral value.
+
+FIXME: the fields of the structure into which this result will be stored
+is restricted to ``int``.  Which means that we cannot expand 64-bit items.
+
+Field examples:
+
++---------------------------+---------------------------------------------+
+| Input                     | Generated code                              |
++===========================+=============================================+
+| %disp   0:s16             | sextract(i, 0, 16)                          |
++---------------------------+---------------------------------------------+
+| %imm9   16:6 10:3         | extract(i, 16, 6) << 3 | extract(i, 10, 3)  |
++---------------------------+---------------------------------------------+
+| %disp12 0:s1 1:1 2:10     | sextract(i, 0, 1) << 11 |                   |
+|                           |    extract(i, 1, 1) << 10 |                 |
+|                           |    extract(i, 2, 10)                        |
++---------------------------+---------------------------------------------+
+| %shimm8 5:s8 13:1         | expand_shimm8(sextract(i, 5, 8) << 1 |      |
+|   !function=expand_shimm8 |               extract(i, 13, 1))            |
++---------------------------+---------------------------------------------+
+
+Argument Sets
+=============
+
+Syntax::
+
+  args_def    := '&' identifier ( args_elt )+ ( !extern )?
+  args_elt    := identifier
+
+Each *args_elt* defines an argument within the argument set.
+Each argument set will be rendered as a C structure "arg_$name"
+with each of the fields being one of the member arguments.
+
+If ``!extern`` is specified, the backing structure is assumed
+to have been already declared, typically via a second decoder.
+
+Argument set examples::
+
+  &reg3       ra rb rc
+  &loadstore  reg base offset
+
+
+Formats
+=======
+
+Syntax::
+
+  fmt_def      := '@' identifier ( fmt_elt )+
+  fmt_elt      := fixedbit_elt | field_elt | field_ref | args_ref
+  fixedbit_elt := [01.-]+
+  field_elt    := identifier ':' 's'? number
+  field_ref    := '%' identifier | identifier '=' '%' identifier
+  args_ref     := '&' identifier
+
+Defining a format is a handy way to avoid replicating groups of fields
+across many instruction patterns.
+
+A *fixedbit_elt* describes a contiguous sequence of bits that must
+be 1, 0, or don't care.  The difference between '.' and '-'
+is that '.' means that the bit will be covered with a field or a
+final 0 or 1 from the pattern, and '-' means that the bit is really
+ignored by the cpu and will not be specified.
+
+A *field_elt* describes a simple field only given a width; the position of
+the field is implied by its position with respect to other *fixedbit_elt*
+and *field_elt*.
+
+If any *fixedbit_elt* or *field_elt* appear, then all bits must be defined.
+Padding with a *fixedbit_elt* of all '.' is an easy way to accomplish that.
+
+A *field_ref* incorporates a field by reference.  This is the only way to
+add a complex field to a format.  A field may be renamed in the process
+via assignment to another identifier.  This is intended to allow the
+same argument set be used with disjoint named fields.
+
+A single *args_ref* may specify an argument set to use for the format.
+The set of fields in the format must be a subset of the arguments in
+the argument set.  If an argument set is not specified, one will be
+inferred from the set of fields.
+
+It is recommended, but not required, that all *field_ref* and *args_ref*
+appear at the end of the line, not interleaving with *fixedbit_elf* or
+*field_elt*.
+
+Format examples::
+
+  @opr    ...... ra:5 rb:5 ... 0 ....... rc:5
+  @opi    ...... ra:5 lit:8    1 ....... rc:5
+
+Patterns
+========
+
+Syntax::
+
+  pat_def      := identifier ( pat_elt )+
+  pat_elt      := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt
+  fmt_ref      := '@' identifier
+  const_elt    := identifier '=' number
+
+The *fixedbit_elt* and *field_elt* specifiers are unchanged from formats.
+A pattern that does not specify a named format will have one inferred
+from a referenced argument set (if present) and the set of fields.
+
+A *const_elt* allows a argument to be set to a constant value.  This may
+come in handy when fields overlap between patterns and one has to
+include the values in the *fixedbit_elt* instead.
+
+The decoder will call a translator function for each pattern matched.
+
+Pattern examples::
+
+  addl_r   010000 ..... ..... .... 0000000 ..... @opr
+  addl_i   010000 ..... ..... .... 0000000 ..... @opi
+
+which will, in part, invoke::
+
+  trans_addl_r(ctx, &arg_opr, insn)
+
+and::
+
+  trans_addl_i(ctx, &arg_opi, insn)
diff --git a/docs/devel/index.rst b/docs/devel/index.rst
index 6b11e49..ebbab63 100644
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -19,4 +19,4 @@ Contents:
    migration
    stable-process
    testing
-
+   decodetree
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index e342d27..33e32ee 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -17,139 +17,7 @@
 
 #
 # Generate a decoding tree from a specification file.
-#
-# The tree is built from instruction "patterns".  A pattern may represent
-# a single architectural instruction or a group of same, depending on what
-# is convenient for further processing.
-#
-# Each pattern has "fixedbits" & "fixedmask", the combination of which
-# describes the condition under which the pattern is matched:
-#
-#   (insn & fixedmask) == fixedbits
-#
-# Each pattern may have "fields", which are extracted from the insn and
-# passed along to the translator.  Examples of such are registers,
-# immediates, and sub-opcodes.
-#
-# In support of patterns, one may declare fields, argument sets, and
-# formats, each of which may be re-used to simplify further definitions.
-#
-# *** Field syntax:
-#
-# field_def     := '%' identifier ( unnamed_field )+ ( !function=identifier )?
-# unnamed_field := number ':' ( 's' ) number
-#
-# For unnamed_field, the first number is the least-significant bit position of
-# the field and the second number is the length of the field.  If the 's' is
-# present, the field is considered signed.  If multiple unnamed_fields are
-# present, they are concatenated.  In this way one can define disjoint fields.
-#
-# If !function is specified, the concatenated result is passed through the
-# named function, taking and returning an integral value.
-#
-# FIXME: the fields of the structure into which this result will be stored
-# is restricted to "int".  Which means that we cannot expand 64-bit items.
-#
-# Field examples:
-#
-#   %disp   0:s16          -- sextract(i, 0, 16)
-#   %imm9   16:6 10:3      -- extract(i, 16, 6) << 3 | extract(i, 10, 3)
-#   %disp12 0:s1 1:1 2:10  -- sextract(i, 0, 1) << 11
-#                             | extract(i, 1, 1) << 10
-#                             | extract(i, 2, 10)
-#   %shimm8 5:s8 13:1 !function=expand_shimm8
-#                          -- expand_shimm8(sextract(i, 5, 8) << 1
-#                                           | extract(i, 13, 1))
-#
-# *** Argument set syntax:
-#
-# args_def    := '&' identifier ( args_elt )+ ( !extern )?
-# args_elt    := identifier
-#
-# Each args_elt defines an argument within the argument set.
-# Each argument set will be rendered as a C structure "arg_$name"
-# with each of the fields being one of the member arguments.
-#
-# If !extern is specified, the backing structure is assumed to
-# have been already declared, typically via a second decoder.
-#
-# Argument set examples:
-#
-#   &reg3       ra rb rc
-#   &loadstore  reg base offset
-#
-# *** Format syntax:
-#
-# fmt_def      := '@' identifier ( fmt_elt )+
-# fmt_elt      := fixedbit_elt | field_elt | field_ref | args_ref
-# fixedbit_elt := [01.-]+
-# field_elt    := identifier ':' 's'? number
-# field_ref    := '%' identifier | identifier '=' '%' identifier
-# args_ref     := '&' identifier
-#
-# Defining a format is a handy way to avoid replicating groups of fields
-# across many instruction patterns.
-#
-# A fixedbit_elt describes a contiguous sequence of bits that must
-# be 1, 0, [.-] for don't care.  The difference between '.' and '-'
-# is that '.' means that the bit will be covered with a field or a
-# final [01] from the pattern, and '-' means that the bit is really
-# ignored by the cpu and will not be specified.
-#
-# A field_elt describes a simple field only given a width; the position of
-# the field is implied by its position with respect to other fixedbit_elt
-# and field_elt.
-#
-# If any fixedbit_elt or field_elt appear then all bits must be defined.
-# Padding with a fixedbit_elt of all '.' is an easy way to accomplish that.
-#
-# A field_ref incorporates a field by reference.  This is the only way to
-# add a complex field to a format.  A field may be renamed in the process
-# via assignment to another identifier.  This is intended to allow the
-# same argument set be used with disjoint named fields.
-#
-# A single args_ref may specify an argument set to use for the format.
-# The set of fields in the format must be a subset of the arguments in
-# the argument set.  If an argument set is not specified, one will be
-# inferred from the set of fields.
-#
-# It is recommended, but not required, that all field_ref and args_ref
-# appear at the end of the line, not interleaving with fixedbit_elf or
-# field_elt.
-#
-# Format examples:
-#
-#   @opr    ...... ra:5 rb:5 ... 0 ....... rc:5
-#   @opi    ...... ra:5 lit:8    1 ....... rc:5
-#
-# *** Pattern syntax:
-#
-# pat_def      := identifier ( pat_elt )+
-# pat_elt      := fixedbit_elt | field_elt | field_ref
-#               | args_ref | fmt_ref | const_elt
-# fmt_ref      := '@' identifier
-# const_elt    := identifier '=' number
-#
-# The fixedbit_elt and field_elt specifiers are unchanged from formats.
-# A pattern that does not specify a named format will have one inferred
-# from a referenced argument set (if present) and the set of fields.
-#
-# A const_elt allows a argument to be set to a constant value.  This may
-# come in handy when fields overlap between patterns and one has to
-# include the values in the fixedbit_elt instead.
-#
-# The decoder will call a translator function for each pattern matched.
-#
-# Pattern examples:
-#
-#   addl_r   010000 ..... ..... .... 0000000 ..... @opr
-#   addl_i   010000 ..... ..... .... 0000000 ..... @opi
-#
-# which will, in part, invoke
-#
-#   trans_addl_r(ctx, &arg_opr, insn)
-# and
-#   trans_addl_i(ctx, &arg_opi, insn)
+# See the syntax and semantics in docs/devel/decodetree.rst.
 #
 
 import os
-- 
cgit v1.1


From 5d53b0f5d35248894bf7c223689e95600b88434e Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Wed, 27 Feb 2019 18:34:38 -0800
Subject: decodetree: Document the usefulness of argument sets

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 docs/devel/decodetree.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst
index d9be30b..62cb7f6 100644
--- a/docs/devel/decodetree.rst
+++ b/docs/devel/decodetree.rst
@@ -69,6 +69,13 @@ with each of the fields being one of the member arguments.
 If ``!extern`` is specified, the backing structure is assumed
 to have been already declared, typically via a second decoder.
 
+Argument sets are useful when one wants to define helper functions
+for the translator functions that can perform operations on a common
+set of arguments.  This can ensure, for instance, that the ``AND``
+pattern and the ``OR`` pattern put their operands into the same named
+structure, so that a common ``gen_logic_insn`` may be able to handle
+the operations common between the two.
+
 Argument set examples::
 
   &reg3       ra rb rc
-- 
cgit v1.1


From 9b3186e38f00ae0cba36c096e3654f916699f336 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <f4bug@amsat.org>
Date: Sun, 16 Dec 2018 20:07:38 -0800
Subject: decodetree: Ensure build_tree does not include values outside
 insnmask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reproduced with "scripts/decodetree.py /dev/null".

Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 33e32ee..e26d825 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -784,7 +784,7 @@ class Tree:
 
 def build_tree(pats, outerbits, outermask):
     # Find the intersection of all remaining fixedmask.
-    innermask = ~outermask
+    innermask = ~outermask & insnmask
     for i in pats:
         innermask &= i.fixedmask
 
-- 
cgit v1.1


From eb6b87fac70dd62e3f1286703db20c012e7a9611 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 23 Feb 2019 08:57:46 -0800
Subject: decodetree: Do not unconditionaly return from Pattern.output_code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a consequence, the 'return false' gets pushed up one level.

This will allow us to perform some other action when the
translator returns failure.

Tested-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index e26d825..cc5fa1a 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -348,8 +348,8 @@ class Pattern(General):
             output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n')
         for n, f in self.fields.items():
             output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
-        output(ind, 'return ', translate_prefix, '_', self.name,
-               '(ctx, &u.f_', arg, ');\n')
+        output(ind, 'if (', translate_prefix, '_', self.name,
+               '(ctx, &u.f_', arg, ')) return true;\n')
 # end Pattern
 
 
@@ -777,8 +777,8 @@ class Tree:
             output(ind, '    /* ',
                    str_match_bits(innerbits, innermask), ' */\n')
             s.output_code(i + 4, extracted, innerbits, innermask)
+            output(ind, '    return false;\n')
         output(ind, '}\n')
-        output(ind, 'return false;\n')
 # end Tree
 
 
@@ -932,6 +932,7 @@ def main():
     output(i4, '} u;\n\n')
 
     t.output_code(4, False, 0, 0)
+    output(i4, 'return false;\n')
 
     output('}\n')
 
-- 
cgit v1.1


From 0eff2df4a2ce677230119440f7eb057acffad5eb Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 23 Feb 2019 11:35:36 -0800
Subject: decodetree: Allow grouping of overlapping patterns

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 docs/devel/decodetree.rst                      |  58 +++++++++
 scripts/decodetree.py                          | 165 +++++++++++++++++++++----
 tests/decode/err_pattern_group_overlap1.decode |   6 +
 3 files changed, 207 insertions(+), 22 deletions(-)
 create mode 100644 tests/decode/err_pattern_group_overlap1.decode

diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst
index 62cb7f6..44ac621 100644
--- a/docs/devel/decodetree.rst
+++ b/docs/devel/decodetree.rst
@@ -161,3 +161,61 @@ which will, in part, invoke::
 and::
 
   trans_addl_i(ctx, &arg_opi, insn)
+
+Pattern Groups
+==============
+
+Syntax::
+
+  group    := '{' ( pat_def | group )+ '}'
+
+A *group* begins with a lone open-brace, with all subsequent lines
+indented two spaces, and ending with a lone close-brace.  Groups
+may be nested, increasing the required indentation of the lines
+within the nested group to two spaces per nesting level.
+
+Unlike ungrouped patterns, grouped patterns are allowed to overlap.
+Conflicts are resolved by selecting the patterns in order.  If all
+of the fixedbits for a pattern match, its translate function will
+be called.  If the translate function returns false, then subsequent
+patterns within the group will be matched.
+
+The following example from PA-RISC shows specialization of the *or*
+instruction::
+
+  {
+    {
+      nop   000010 ----- ----- 0000 001001 0 00000
+      copy  000010 00000 r1:5  0000 001001 0 rt:5
+    }
+    or      000010 rt2:5 r1:5  cf:4 001001 0 rt:5
+  }
+
+When the *cf* field is zero, the instruction has no side effects,
+and may be specialized.  When the *rt* field is zero, the output
+is discarded and so the instruction has no effect.  When the *rt2*
+field is zero, the operation is ``reg[rt] | 0`` and so encodes
+the canonical register copy operation.
+
+The output from the generator might look like::
+
+  switch (insn & 0xfc000fe0) {
+  case 0x08000240:
+    /* 000010.. ........ ....0010 010..... */
+    if ((insn & 0x0000f000) == 0x00000000) {
+        /* 000010.. ........ 00000010 010..... */
+        if ((insn & 0x0000001f) == 0x00000000) {
+            /* 000010.. ........ 00000010 01000000 */
+            extract_decode_Fmt_0(&u.f_decode0, insn);
+            if (trans_nop(ctx, &u.f_decode0)) return true;
+        }
+        if ((insn & 0x03e00000) == 0x00000000) {
+            /* 00001000 000..... 00000010 010..... */
+            extract_decode_Fmt_1(&u.f_decode1, insn);
+            if (trans_copy(ctx, &u.f_decode1)) return true;
+        }
+    }
+    extract_decode_Fmt_2(&u.f_decode2, insn);
+    if (trans_or(ctx, &u.f_decode2)) return true;
+    return false;
+  }
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index cc5fa1a..2711c6c 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -31,6 +31,7 @@ fields = {}
 arguments = {}
 formats = {}
 patterns = []
+allpatterns = []
 
 translate_prefix = 'trans'
 translate_scope = 'static '
@@ -300,13 +301,7 @@ class General:
         self.fields = flds
 
     def __str__(self):
-        r = self.name
-        if self.base:
-            r = r + ' ' + self.base.name
-        else:
-            r = r + ' ' + str(self.fields)
-        r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
-        return r
+        return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
 
     def str1(self, i):
         return str_indent(i) + self.__str__()
@@ -353,6 +348,47 @@ class Pattern(General):
 # end Pattern
 
 
+class MultiPattern(General):
+    """Class representing an overlapping set of instruction patterns"""
+
+    def __init__(self, lineno, pats, fixb, fixm, udfm):
+        self.file = input_file
+        self.lineno = lineno
+        self.pats = pats
+        self.base = None
+        self.fixedbits = fixb
+        self.fixedmask = fixm
+        self.undefmask = udfm
+
+    def __str__(self):
+        r = "{"
+        for p in self.pats:
+           r = r + ' ' + str(p)
+        return r + "}"
+
+    def output_decl(self):
+        for p in self.pats:
+            p.output_decl()
+
+    def output_code(self, i, extracted, outerbits, outermask):
+        global translate_prefix
+        ind = str_indent(i)
+        for p in self.pats:
+            if outermask != p.fixedmask:
+                innermask = p.fixedmask & ~outermask
+                innerbits = p.fixedbits & ~outermask
+                output(ind, 'if ((insn & ',
+                       '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
+                       ') {\n')
+                output(ind, '    /* ',
+                       str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
+                p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
+                output(ind, '}\n')
+            else:
+                p.output_code(i, extracted, p.fixedbits, p.fixedmask)
+#end MultiPattern
+
+
 def parse_field(lineno, name, toks):
     """Parse one instruction field from TOKS at LINENO"""
     global fields
@@ -505,6 +541,7 @@ def parse_generic(lineno, is_format, name, toks):
     global arguments
     global formats
     global patterns
+    global allpatterns
     global re_ident
     global insnwidth
     global insnmask
@@ -649,6 +686,7 @@ def parse_generic(lineno, is_format, name, toks):
         pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
                       undefmask, fieldmask, flds)
         patterns.append(pat)
+        allpatterns.append(pat)
 
     # Validate the masks that we have assembled.
     if fieldmask & fixedmask:
@@ -667,17 +705,66 @@ def parse_generic(lineno, is_format, name, toks):
                           .format(allbits ^ insnmask))
 # end parse_general
 
+def build_multi_pattern(lineno, pats):
+    """Validate the Patterns going into a MultiPattern."""
+    global patterns
+    global insnmask
+
+    if len(pats) < 2:
+        error(lineno, 'less than two patterns within braces')
+
+    fixedmask = insnmask
+    undefmask = insnmask
+
+    # Collect fixed/undefmask for all of the children.
+    # Move the defining lineno back to that of the first child.
+    for p in pats:
+        fixedmask &= p.fixedmask
+        undefmask &= p.undefmask
+        if p.lineno < lineno:
+            lineno = p.lineno
+
+    repeat = True
+    while repeat:
+        if fixedmask == 0:
+            error(lineno, 'no overlap in patterns within braces')
+        fixedbits = None
+        for p in pats:
+            thisbits = p.fixedbits & fixedmask
+            if fixedbits is None:
+                fixedbits = thisbits
+            elif fixedbits != thisbits:
+                fixedmask &= ~(fixedbits ^ thisbits)
+                break
+        else:
+            repeat = False
+
+    mp = MultiPattern(lineno, pats, fixedbits, fixedmask, undefmask)
+    patterns.append(mp)
+# end build_multi_pattern
 
 def parse_file(f):
     """Parse all of the patterns within a file"""
 
+    global patterns
+
     # Read all of the lines of the file.  Concatenate lines
     # ending in backslash; discard empty lines and comments.
     toks = []
     lineno = 0
+    nesting = 0
+    saved_pats = []
+
     for line in f:
         lineno += 1
 
+        # Expand and strip spaces, to find indent.
+        line = line.rstrip()
+        line = line.expandtabs()
+        len1 = len(line)
+        line = line.lstrip()
+        len2 = len(line)
+
         # Discard comments
         end = line.find('#')
         if end >= 0:
@@ -687,10 +774,18 @@ def parse_file(f):
         if len(toks) != 0:
             # Next line after continuation
             toks.extend(t)
-        elif len(t) == 0:
-            # Empty line
-            continue
         else:
+            # Allow completely blank lines.
+            if len1 == 0:
+                continue
+            indent = len1 - len2
+            # Empty line due to comment.
+            if len(t) == 0:
+                # Indentation must be correct, even for comment lines.
+                if indent != nesting:
+                    error(lineno, 'indentation ', indent, ' != ', nesting)
+                continue
+            start_lineno = lineno
             toks = t
 
         # Continuation?
@@ -698,21 +793,47 @@ def parse_file(f):
             toks.pop()
             continue
 
-        if len(toks) < 2:
-            error(lineno, 'short line')
-
         name = toks[0]
         del toks[0]
 
+        # End nesting?
+        if name == '}':
+            if nesting == 0:
+                error(start_lineno, 'mismatched close brace')
+            if len(toks) != 0:
+                error(start_lineno, 'extra tokens after close brace')
+            nesting -= 2
+            if indent != nesting:
+                error(start_lineno, 'indentation ', indent, ' != ', nesting)
+            pats = patterns
+            patterns = saved_pats.pop()
+            build_multi_pattern(lineno, pats)
+            toks = []
+            continue
+
+        # Everything else should have current indentation.
+        if indent != nesting:
+            error(start_lineno, 'indentation ', indent, ' != ', nesting)
+
+        # Start nesting?
+        if name == '{':
+            if len(toks) != 0:
+                error(start_lineno, 'extra tokens after open brace')
+            saved_pats.append(patterns)
+            patterns = []
+            nesting += 2
+            toks = []
+            continue
+
         # Determine the type of object needing to be parsed.
         if name[0] == '%':
-            parse_field(lineno, name[1:], toks)
+            parse_field(start_lineno, name[1:], toks)
         elif name[0] == '&':
-            parse_arguments(lineno, name[1:], toks)
+            parse_arguments(start_lineno, name[1:], toks)
         elif name[0] == '@':
-            parse_generic(lineno, True, name[1:], toks)
+            parse_generic(start_lineno, True, name[1:], toks)
         else:
-            parse_generic(lineno, False, name, toks)
+            parse_generic(start_lineno, False, name, toks)
         toks = []
 # end parse_file
 
@@ -789,11 +910,10 @@ def build_tree(pats, outerbits, outermask):
         innermask &= i.fixedmask
 
     if innermask == 0:
-        pnames = []
+        text = 'overlapping patterns:'
         for p in pats:
-            pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
-        error_with_file(pats[0].file, pats[0].lineno,
-                        'overlapping patterns:', pnames)
+            text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
+        error_with_file(pats[0].file, pats[0].lineno, text)
 
     fullmask = outermask | innermask
 
@@ -846,6 +966,7 @@ def main():
     global arguments
     global formats
     global patterns
+    global allpatterns
     global translate_scope
     global translate_prefix
     global output_fd
@@ -907,7 +1028,7 @@ def main():
     # Make sure that the argument sets are the same, and declare the
     # function only once.
     out_pats = {}
-    for i in patterns:
+    for i in allpatterns:
         if i.name in out_pats:
             p = out_pats[i.name]
             if i.base.base != p.base.base:
diff --git a/tests/decode/err_pattern_group_overlap1.decode b/tests/decode/err_pattern_group_overlap1.decode
new file mode 100644
index 0000000..ebe3030
--- /dev/null
+++ b/tests/decode/err_pattern_group_overlap1.decode
@@ -0,0 +1,6 @@
+one	00000000000000000000000000000000
+{
+  two	0000000000000000000000000000000 s:1
+  three 000000000000000000000000000000 s:1 0
+}
+
-- 
cgit v1.1


From bf92118fa9dda4a425da7f75d43ad7b4df8d5650 Mon Sep 17 00:00:00 2001
From: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Date: Wed, 27 Feb 2019 13:02:17 +0100
Subject: test/decode: Add tests for PatternGroups

This adds one test that supposed to succeed to test deep nesting
of pattern groups which is rarely exercised by targets using decode
tree. The remaining tests exercise various fail conditions.

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
Message-Id: <20190227120217.20794-1-kbastian@mail.uni-paderborn.de>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tests/decode/check.sh                        |  6 ++++++
 tests/decode/err_pattern_group_empty.decode  |  6 ++++++
 tests/decode/err_pattern_group_ident1.decode | 10 ++++++++++
 tests/decode/err_pattern_group_ident2.decode | 11 +++++++++++
 tests/decode/err_pattern_group_nest1.decode  | 13 +++++++++++++
 tests/decode/succ_pattern_group_nest1.decode | 22 ++++++++++++++++++++++
 6 files changed, 68 insertions(+)
 create mode 100644 tests/decode/err_pattern_group_empty.decode
 create mode 100644 tests/decode/err_pattern_group_ident1.decode
 create mode 100644 tests/decode/err_pattern_group_ident2.decode
 create mode 100644 tests/decode/err_pattern_group_nest1.decode
 create mode 100644 tests/decode/succ_pattern_group_nest1.decode

diff --git a/tests/decode/check.sh b/tests/decode/check.sh
index 79a06c3..95445a0 100755
--- a/tests/decode/check.sh
+++ b/tests/decode/check.sh
@@ -15,4 +15,10 @@ for i in err_*.decode; do
     fi
 done
 
+for i in succ_*.decode; do
+    if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then
+        echo FAIL:$i 1>&2
+    fi
+done
+
 exit $E
diff --git a/tests/decode/err_pattern_group_empty.decode b/tests/decode/err_pattern_group_empty.decode
new file mode 100644
index 0000000..abbff6b
--- /dev/null
+++ b/tests/decode/err_pattern_group_empty.decode
@@ -0,0 +1,6 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+# empty groups are not allowed
+{
+}
diff --git a/tests/decode/err_pattern_group_ident1.decode b/tests/decode/err_pattern_group_ident1.decode
new file mode 100644
index 0000000..3e65fab
--- /dev/null
+++ b/tests/decode/err_pattern_group_ident1.decode
@@ -0,0 +1,10 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+%sub1 0:8
+
+# Make sure that indentation is enforced
+{
+    top      00000000 00000000 00000000 00000000
+    sub1     00000000 00000000 00000000 ........ %sub1
+}
diff --git a/tests/decode/err_pattern_group_ident2.decode b/tests/decode/err_pattern_group_ident2.decode
new file mode 100644
index 0000000..bc85923
--- /dev/null
+++ b/tests/decode/err_pattern_group_ident2.decode
@@ -0,0 +1,11 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+%sub1 0:8
+
+# Make sure that indentation is enforced
+{
+  top      00000000 00000000 00000000 00000000
+  sub1     00000000 00000000 00000000 ........ %sub1
+# comments are suposed to be indented
+}
diff --git a/tests/decode/err_pattern_group_nest1.decode b/tests/decode/err_pattern_group_nest1.decode
new file mode 100644
index 0000000..92e971c
--- /dev/null
+++ b/tests/decode/err_pattern_group_nest1.decode
@@ -0,0 +1,13 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+%sub1 0:8
+%sub2 8:8
+%sub3 16:8
+%sub4 24:8
+
+# Groups with no overlap are supposed to fail
+{
+  top  00000000 00000000 00000000 00000000
+  sub4 ........ ........ ........ ........ %sub1 %sub2 %sub3 %sub4
+}
diff --git a/tests/decode/succ_pattern_group_nest1.decode b/tests/decode/succ_pattern_group_nest1.decode
new file mode 100644
index 0000000..77b0f48
--- /dev/null
+++ b/tests/decode/succ_pattern_group_nest1.decode
@@ -0,0 +1,22 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+%sub1 0:8
+%sub2 8:8
+%sub3 16:8
+%sub4 24:7
+
+# Make sure deep netsting works, as few targets will actually exercise it
+{
+  top        00000000 00000000 00000000 00000000
+  {
+    sub1     00000000 00000000 00000000 ........ %sub1
+    {
+      sub2   00000000 00000000 ........ ........ %sub1 %sub2
+      {
+        sub3 00000000 ........ ........ ........ %sub1 %sub2 %sub3
+        sub4 0....... ........ ........ ........ %sub1 %sub2 %sub3 %sub4
+      }
+    }
+  }
+}
-- 
cgit v1.1


From cd3e7fc18db43b296f413814cd4b72bcd6878bc4 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Sat, 23 Feb 2019 17:44:31 -0800
Subject: decodetree: Add --static-decode option

Like --decode, but do not drop 'static' qualifier.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 2711c6c..6067e94 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -979,7 +979,8 @@ def main():
 
     decode_scope = 'static '
 
-    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=']
+    long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
+                 'static-decode=']
     try:
         (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts)
     except getopt.GetoptError as err:
@@ -990,6 +991,8 @@ def main():
         elif o == '--decode':
             decode_function = a
             decode_scope = ''
+        elif o == '--static-decode':
+            decode_function = a
         elif o == '--translate':
             translate_prefix = a
             translate_scope = ''
-- 
cgit v1.1


From 82bfac1c06cadeb5c7252734dc695d951185916c Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Wed, 27 Feb 2019 21:37:32 -0800
Subject: decodetree: Produce clean output for an empty input file

This is interesting for bisection, where an output file is plumbed,
but does not yet have patterns.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 6067e94..6e7ba27 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -1049,15 +1049,16 @@ def main():
            '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
 
     i4 = str_indent(4)
-    output(i4, 'union {\n')
-    for n in sorted(arguments.keys()):
-        f = arguments[n]
-        output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
-    output(i4, '} u;\n\n')
 
-    t.output_code(4, False, 0, 0)
-    output(i4, 'return false;\n')
+    if len(allpatterns) != 0:
+        output(i4, 'union {\n')
+        for n in sorted(arguments.keys()):
+            f = arguments[n]
+            output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
+        output(i4, '} u;\n\n')
+        t.output_code(4, False, 0, 0)
 
+    output(i4, 'return false;\n')
     output('}\n')
 
     if output_file:
-- 
cgit v1.1


From 263ac638a76a72841e3f513b14c515680703e084 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Thu, 28 Feb 2019 14:36:52 -0800
Subject: decodetree: Allow +- to begin a number initializing a field

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 6e7ba27..f6f58e2 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -589,7 +589,7 @@ def parse_generic(lineno, is_format, name, toks):
             continue
 
         # 'Foo=number' sets an argument field to a constant value
-        if re_fullmatch(re_ident + '=[0-9]+', t):
+        if re_fullmatch(re_ident + '=[+-]?[0-9]+', t):
             (fname, value) = t.split('=')
             value = int(value)
             flds = add_field(lineno, flds, fname, ConstField(value))
-- 
cgit v1.1


From 71ecf79bf40db20237a3cfc01cc407cc4cad8817 Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Thu, 28 Feb 2019 14:45:50 -0800
Subject: decodetree: Prefix extract function names with decode_function

This makes it easier to name Formats within multiple decode files.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index f6f58e2..ac158b4 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -312,7 +312,8 @@ class Format(General):
     """Class representing an instruction format"""
 
     def extract_name(self):
-        return 'extract_' + self.name
+        global decode_function
+        return decode_function + '_extract_' + self.name
 
     def output_extract(self):
         output('static void ', self.extract_name(), '(',
-- 
cgit v1.1


From 2decfc95583dc28add69810eaca6ada7b4b44d3a Mon Sep 17 00:00:00 2001
From: Richard Henderson <richard.henderson@linaro.org>
Date: Tue, 5 Mar 2019 15:34:41 -0800
Subject: decodetree: Properly diagnose fields overflowing an insn

Previously this would result in an exception for shifting
the field mask by a negative number.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 scripts/decodetree.py          | 2 ++
 tests/decode/err_width1.decode | 5 +++++
 tests/decode/err_width2.decode | 5 +++++
 tests/decode/err_width3.decode | 5 +++++
 tests/decode/err_width4.decode | 5 +++++
 5 files changed, 22 insertions(+)
 create mode 100644 tests/decode/err_width1.decode
 create mode 100644 tests/decode/err_width2.decode
 create mode 100644 tests/decode/err_width3.decode
 create mode 100644 tests/decode/err_width4.decode

diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index ac158b4..aa790b5 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -622,6 +622,8 @@ def parse_generic(lineno, is_format, name, toks):
                 sign = True
                 flen = flen[1:]
             shift = int(flen, 10)
+            if shift + width > insnwidth:
+                error(lineno, 'field {0} exceeds insnwidth'.format(fname))
             f = Field(sign, insnwidth - width - shift, shift)
             flds = add_field(lineno, flds, fname, f)
             fixedbits <<= shift
diff --git a/tests/decode/err_width1.decode b/tests/decode/err_width1.decode
new file mode 100644
index 0000000..0c14f6d
--- /dev/null
+++ b/tests/decode/err_width1.decode
@@ -0,0 +1,5 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+# Diagnose too many bits (33 of 32)
+one	000000000000000000000000000000000
diff --git a/tests/decode/err_width2.decode b/tests/decode/err_width2.decode
new file mode 100644
index 0000000..47f0acf
--- /dev/null
+++ b/tests/decode/err_width2.decode
@@ -0,0 +1,5 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+# Diagnose too few bits (31 of 32)
+one	0000000000000000000000000000000
diff --git a/tests/decode/err_width3.decode b/tests/decode/err_width3.decode
new file mode 100644
index 0000000..c5fb6b3
--- /dev/null
+++ b/tests/decode/err_width3.decode
@@ -0,0 +1,5 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+# Diagnose too many bits (33 of 32)
+one	0 s:32
diff --git a/tests/decode/err_width4.decode b/tests/decode/err_width4.decode
new file mode 100644
index 0000000..1588a63
--- /dev/null
+++ b/tests/decode/err_width4.decode
@@ -0,0 +1,5 @@
+# This work is licensed under the terms of the GNU LGPL, version 2 or later.
+# See the COPYING.LIB file in the top-level directory.
+
+# Diagnose too few bits (31 of 32)
+one	0 s:30
-- 
cgit v1.1