From 55fd189205493890a10245a6828b8f282ff518c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sat, 10 Nov 2018 22:13:12 +0100 Subject: MAINTAINERS: Add scripts/decodetree.py to the TCG section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20181110211313.6922-2-f4bug@amsat.org> Signed-off-by: Richard Henderson --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index d326756..3426d33 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -117,6 +117,7 @@ F: cpus.c F: exec.c F: accel/tcg/ F: accel/stubs/tcg-stub.c +F: scripts/decodetree.py F: include/exec/cpu*.h F: include/exec/exec-all.h F: include/exec/helper*.h -- cgit v1.1 From 3fdbf5d6794db253b0a5173204bbbcc8a17bc9d1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 23 Feb 2019 13:00:10 -0800 Subject: decodetree: Move documentation to docs/devel/decodetree.rst One great big block comment isn't the best way to document the syntax of a language. Reviewed-by: Bastian Koppelmann Signed-off-by: Richard Henderson --- MAINTAINERS | 1 + docs/devel/decodetree.rst | 156 ++++++++++++++++++++++++++++++++++++++++++++++ docs/devel/index.rst | 2 +- scripts/decodetree.py | 134 +-------------------------------------- 4 files changed, 159 insertions(+), 134 deletions(-) create mode 100644 docs/devel/decodetree.rst diff --git a/MAINTAINERS b/MAINTAINERS index 3426d33..564b8db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -118,6 +118,7 @@ F: exec.c F: accel/tcg/ F: accel/stubs/tcg-stub.c F: scripts/decodetree.py +F: docs/devel/decodetree.rst F: include/exec/cpu*.h F: include/exec/exec-all.h F: include/exec/helper*.h diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst new file mode 100644 index 0000000..d9be30b --- /dev/null +++ b/docs/devel/decodetree.rst @@ -0,0 +1,156 @@ +======================== +Decodetree Specification +======================== + +A *decodetree* is built from instruction *patterns*. A pattern may +represent a single architectural instruction or a group of same, depending +on what is convenient for further processing. + +Each pattern has both *fixedbits* and *fixedmask*, the combination of which +describes the condition under which the pattern is matched:: + + (insn & fixedmask) == fixedbits + +Each pattern may have *fields*, which are extracted from the insn and +passed along to the translator. Examples of such are registers, +immediates, and sub-opcodes. + +In support of patterns, one may declare *fields*, *argument sets*, and +*formats*, each of which may be re-used to simplify further definitions. + +Fields +====== + +Syntax:: + + field_def := '%' identifier ( unnamed_field )+ ( !function=identifier )? + unnamed_field := number ':' ( 's' ) number + +For *unnamed_field*, the first number is the least-significant bit position +of the field and the second number is the length of the field. If the 's' is +present, the field is considered signed. If multiple ``unnamed_fields`` are +present, they are concatenated. In this way one can define disjoint fields. + +If ``!function`` is specified, the concatenated result is passed through the +named function, taking and returning an integral value. + +FIXME: the fields of the structure into which this result will be stored +is restricted to ``int``. Which means that we cannot expand 64-bit items. + +Field examples: + ++---------------------------+---------------------------------------------+ +| Input | Generated code | ++===========================+=============================================+ +| %disp 0:s16 | sextract(i, 0, 16) | ++---------------------------+---------------------------------------------+ +| %imm9 16:6 10:3 | extract(i, 16, 6) << 3 | extract(i, 10, 3) | ++---------------------------+---------------------------------------------+ +| %disp12 0:s1 1:1 2:10 | sextract(i, 0, 1) << 11 | | +| | extract(i, 1, 1) << 10 | | +| | extract(i, 2, 10) | ++---------------------------+---------------------------------------------+ +| %shimm8 5:s8 13:1 | expand_shimm8(sextract(i, 5, 8) << 1 | | +| !function=expand_shimm8 | extract(i, 13, 1)) | ++---------------------------+---------------------------------------------+ + +Argument Sets +============= + +Syntax:: + + args_def := '&' identifier ( args_elt )+ ( !extern )? + args_elt := identifier + +Each *args_elt* defines an argument within the argument set. +Each argument set will be rendered as a C structure "arg_$name" +with each of the fields being one of the member arguments. + +If ``!extern`` is specified, the backing structure is assumed +to have been already declared, typically via a second decoder. + +Argument set examples:: + + ®3 ra rb rc + &loadstore reg base offset + + +Formats +======= + +Syntax:: + + fmt_def := '@' identifier ( fmt_elt )+ + fmt_elt := fixedbit_elt | field_elt | field_ref | args_ref + fixedbit_elt := [01.-]+ + field_elt := identifier ':' 's'? number + field_ref := '%' identifier | identifier '=' '%' identifier + args_ref := '&' identifier + +Defining a format is a handy way to avoid replicating groups of fields +across many instruction patterns. + +A *fixedbit_elt* describes a contiguous sequence of bits that must +be 1, 0, or don't care. The difference between '.' and '-' +is that '.' means that the bit will be covered with a field or a +final 0 or 1 from the pattern, and '-' means that the bit is really +ignored by the cpu and will not be specified. + +A *field_elt* describes a simple field only given a width; the position of +the field is implied by its position with respect to other *fixedbit_elt* +and *field_elt*. + +If any *fixedbit_elt* or *field_elt* appear, then all bits must be defined. +Padding with a *fixedbit_elt* of all '.' is an easy way to accomplish that. + +A *field_ref* incorporates a field by reference. This is the only way to +add a complex field to a format. A field may be renamed in the process +via assignment to another identifier. This is intended to allow the +same argument set be used with disjoint named fields. + +A single *args_ref* may specify an argument set to use for the format. +The set of fields in the format must be a subset of the arguments in +the argument set. If an argument set is not specified, one will be +inferred from the set of fields. + +It is recommended, but not required, that all *field_ref* and *args_ref* +appear at the end of the line, not interleaving with *fixedbit_elf* or +*field_elt*. + +Format examples:: + + @opr ...... ra:5 rb:5 ... 0 ....... rc:5 + @opi ...... ra:5 lit:8 1 ....... rc:5 + +Patterns +======== + +Syntax:: + + pat_def := identifier ( pat_elt )+ + pat_elt := fixedbit_elt | field_elt | field_ref | args_ref | fmt_ref | const_elt + fmt_ref := '@' identifier + const_elt := identifier '=' number + +The *fixedbit_elt* and *field_elt* specifiers are unchanged from formats. +A pattern that does not specify a named format will have one inferred +from a referenced argument set (if present) and the set of fields. + +A *const_elt* allows a argument to be set to a constant value. This may +come in handy when fields overlap between patterns and one has to +include the values in the *fixedbit_elt* instead. + +The decoder will call a translator function for each pattern matched. + +Pattern examples:: + + addl_r 010000 ..... ..... .... 0000000 ..... @opr + addl_i 010000 ..... ..... .... 0000000 ..... @opi + +which will, in part, invoke:: + + trans_addl_r(ctx, &arg_opr, insn) + +and:: + + trans_addl_i(ctx, &arg_opi, insn) diff --git a/docs/devel/index.rst b/docs/devel/index.rst index 6b11e49..ebbab63 100644 --- a/docs/devel/index.rst +++ b/docs/devel/index.rst @@ -19,4 +19,4 @@ Contents: migration stable-process testing - + decodetree diff --git a/scripts/decodetree.py b/scripts/decodetree.py index e342d27..33e32ee 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -17,139 +17,7 @@ # # Generate a decoding tree from a specification file. -# -# The tree is built from instruction "patterns". A pattern may represent -# a single architectural instruction or a group of same, depending on what -# is convenient for further processing. -# -# Each pattern has "fixedbits" & "fixedmask", the combination of which -# describes the condition under which the pattern is matched: -# -# (insn & fixedmask) == fixedbits -# -# Each pattern may have "fields", which are extracted from the insn and -# passed along to the translator. Examples of such are registers, -# immediates, and sub-opcodes. -# -# In support of patterns, one may declare fields, argument sets, and -# formats, each of which may be re-used to simplify further definitions. -# -# *** Field syntax: -# -# field_def := '%' identifier ( unnamed_field )+ ( !function=identifier )? -# unnamed_field := number ':' ( 's' ) number -# -# For unnamed_field, the first number is the least-significant bit position of -# the field and the second number is the length of the field. If the 's' is -# present, the field is considered signed. If multiple unnamed_fields are -# present, they are concatenated. In this way one can define disjoint fields. -# -# If !function is specified, the concatenated result is passed through the -# named function, taking and returning an integral value. -# -# FIXME: the fields of the structure into which this result will be stored -# is restricted to "int". Which means that we cannot expand 64-bit items. -# -# Field examples: -# -# %disp 0:s16 -- sextract(i, 0, 16) -# %imm9 16:6 10:3 -- extract(i, 16, 6) << 3 | extract(i, 10, 3) -# %disp12 0:s1 1:1 2:10 -- sextract(i, 0, 1) << 11 -# | extract(i, 1, 1) << 10 -# | extract(i, 2, 10) -# %shimm8 5:s8 13:1 !function=expand_shimm8 -# -- expand_shimm8(sextract(i, 5, 8) << 1 -# | extract(i, 13, 1)) -# -# *** Argument set syntax: -# -# args_def := '&' identifier ( args_elt )+ ( !extern )? -# args_elt := identifier -# -# Each args_elt defines an argument within the argument set. -# Each argument set will be rendered as a C structure "arg_$name" -# with each of the fields being one of the member arguments. -# -# If !extern is specified, the backing structure is assumed to -# have been already declared, typically via a second decoder. -# -# Argument set examples: -# -# ®3 ra rb rc -# &loadstore reg base offset -# -# *** Format syntax: -# -# fmt_def := '@' identifier ( fmt_elt )+ -# fmt_elt := fixedbit_elt | field_elt | field_ref | args_ref -# fixedbit_elt := [01.-]+ -# field_elt := identifier ':' 's'? number -# field_ref := '%' identifier | identifier '=' '%' identifier -# args_ref := '&' identifier -# -# Defining a format is a handy way to avoid replicating groups of fields -# across many instruction patterns. -# -# A fixedbit_elt describes a contiguous sequence of bits that must -# be 1, 0, [.-] for don't care. The difference between '.' and '-' -# is that '.' means that the bit will be covered with a field or a -# final [01] from the pattern, and '-' means that the bit is really -# ignored by the cpu and will not be specified. -# -# A field_elt describes a simple field only given a width; the position of -# the field is implied by its position with respect to other fixedbit_elt -# and field_elt. -# -# If any fixedbit_elt or field_elt appear then all bits must be defined. -# Padding with a fixedbit_elt of all '.' is an easy way to accomplish that. -# -# A field_ref incorporates a field by reference. This is the only way to -# add a complex field to a format. A field may be renamed in the process -# via assignment to another identifier. This is intended to allow the -# same argument set be used with disjoint named fields. -# -# A single args_ref may specify an argument set to use for the format. -# The set of fields in the format must be a subset of the arguments in -# the argument set. If an argument set is not specified, one will be -# inferred from the set of fields. -# -# It is recommended, but not required, that all field_ref and args_ref -# appear at the end of the line, not interleaving with fixedbit_elf or -# field_elt. -# -# Format examples: -# -# @opr ...... ra:5 rb:5 ... 0 ....... rc:5 -# @opi ...... ra:5 lit:8 1 ....... rc:5 -# -# *** Pattern syntax: -# -# pat_def := identifier ( pat_elt )+ -# pat_elt := fixedbit_elt | field_elt | field_ref -# | args_ref | fmt_ref | const_elt -# fmt_ref := '@' identifier -# const_elt := identifier '=' number -# -# The fixedbit_elt and field_elt specifiers are unchanged from formats. -# A pattern that does not specify a named format will have one inferred -# from a referenced argument set (if present) and the set of fields. -# -# A const_elt allows a argument to be set to a constant value. This may -# come in handy when fields overlap between patterns and one has to -# include the values in the fixedbit_elt instead. -# -# The decoder will call a translator function for each pattern matched. -# -# Pattern examples: -# -# addl_r 010000 ..... ..... .... 0000000 ..... @opr -# addl_i 010000 ..... ..... .... 0000000 ..... @opi -# -# which will, in part, invoke -# -# trans_addl_r(ctx, &arg_opr, insn) -# and -# trans_addl_i(ctx, &arg_opi, insn) +# See the syntax and semantics in docs/devel/decodetree.rst. # import os -- cgit v1.1 From 5d53b0f5d35248894bf7c223689e95600b88434e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 27 Feb 2019 18:34:38 -0800 Subject: decodetree: Document the usefulness of argument sets Reviewed-by: Bastian Koppelmann Signed-off-by: Richard Henderson --- docs/devel/decodetree.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst index d9be30b..62cb7f6 100644 --- a/docs/devel/decodetree.rst +++ b/docs/devel/decodetree.rst @@ -69,6 +69,13 @@ with each of the fields being one of the member arguments. If ``!extern`` is specified, the backing structure is assumed to have been already declared, typically via a second decoder. +Argument sets are useful when one wants to define helper functions +for the translator functions that can perform operations on a common +set of arguments. This can ensure, for instance, that the ``AND`` +pattern and the ``OR`` pattern put their operands into the same named +structure, so that a common ``gen_logic_insn`` may be able to handle +the operations common between the two. + Argument set examples:: ®3 ra rb rc -- cgit v1.1 From 9b3186e38f00ae0cba36c096e3654f916699f336 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Sun, 16 Dec 2018 20:07:38 -0800 Subject: decodetree: Ensure build_tree does not include values outside insnmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reproduced with "scripts/decodetree.py /dev/null". Reviewed-by: Bastian Koppelmann Reviewed-by: Eduardo Habkost Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- scripts/decodetree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/decodetree.py b/scripts/decodetree.py index 33e32ee..e26d825 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -784,7 +784,7 @@ class Tree: def build_tree(pats, outerbits, outermask): # Find the intersection of all remaining fixedmask. - innermask = ~outermask + innermask = ~outermask & insnmask for i in pats: innermask &= i.fixedmask -- cgit v1.1 From eb6b87fac70dd62e3f1286703db20c012e7a9611 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 23 Feb 2019 08:57:46 -0800 Subject: decodetree: Do not unconditionaly return from Pattern.output_code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a consequence, the 'return false' gets pushed up one level. This will allow us to perform some other action when the translator returns failure. Tested-by: Philippe Mathieu-Daudé Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Bastian Koppelmann Signed-off-by: Richard Henderson --- scripts/decodetree.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/decodetree.py b/scripts/decodetree.py index e26d825..cc5fa1a 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -348,8 +348,8 @@ class Pattern(General): output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n') for n, f in self.fields.items(): output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') - output(ind, 'return ', translate_prefix, '_', self.name, - '(ctx, &u.f_', arg, ');\n') + output(ind, 'if (', translate_prefix, '_', self.name, + '(ctx, &u.f_', arg, ')) return true;\n') # end Pattern @@ -777,8 +777,8 @@ class Tree: output(ind, ' /* ', str_match_bits(innerbits, innermask), ' */\n') s.output_code(i + 4, extracted, innerbits, innermask) + output(ind, ' return false;\n') output(ind, '}\n') - output(ind, 'return false;\n') # end Tree @@ -932,6 +932,7 @@ def main(): output(i4, '} u;\n\n') t.output_code(4, False, 0, 0) + output(i4, 'return false;\n') output('}\n') -- cgit v1.1 From 0eff2df4a2ce677230119440f7eb057acffad5eb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 23 Feb 2019 11:35:36 -0800 Subject: decodetree: Allow grouping of overlapping patterns Signed-off-by: Richard Henderson --- docs/devel/decodetree.rst | 58 +++++++++ scripts/decodetree.py | 165 +++++++++++++++++++++---- tests/decode/err_pattern_group_overlap1.decode | 6 + 3 files changed, 207 insertions(+), 22 deletions(-) create mode 100644 tests/decode/err_pattern_group_overlap1.decode diff --git a/docs/devel/decodetree.rst b/docs/devel/decodetree.rst index 62cb7f6..44ac621 100644 --- a/docs/devel/decodetree.rst +++ b/docs/devel/decodetree.rst @@ -161,3 +161,61 @@ which will, in part, invoke:: and:: trans_addl_i(ctx, &arg_opi, insn) + +Pattern Groups +============== + +Syntax:: + + group := '{' ( pat_def | group )+ '}' + +A *group* begins with a lone open-brace, with all subsequent lines +indented two spaces, and ending with a lone close-brace. Groups +may be nested, increasing the required indentation of the lines +within the nested group to two spaces per nesting level. + +Unlike ungrouped patterns, grouped patterns are allowed to overlap. +Conflicts are resolved by selecting the patterns in order. If all +of the fixedbits for a pattern match, its translate function will +be called. If the translate function returns false, then subsequent +patterns within the group will be matched. + +The following example from PA-RISC shows specialization of the *or* +instruction:: + + { + { + nop 000010 ----- ----- 0000 001001 0 00000 + copy 000010 00000 r1:5 0000 001001 0 rt:5 + } + or 000010 rt2:5 r1:5 cf:4 001001 0 rt:5 + } + +When the *cf* field is zero, the instruction has no side effects, +and may be specialized. When the *rt* field is zero, the output +is discarded and so the instruction has no effect. When the *rt2* +field is zero, the operation is ``reg[rt] | 0`` and so encodes +the canonical register copy operation. + +The output from the generator might look like:: + + switch (insn & 0xfc000fe0) { + case 0x08000240: + /* 000010.. ........ ....0010 010..... */ + if ((insn & 0x0000f000) == 0x00000000) { + /* 000010.. ........ 00000010 010..... */ + if ((insn & 0x0000001f) == 0x00000000) { + /* 000010.. ........ 00000010 01000000 */ + extract_decode_Fmt_0(&u.f_decode0, insn); + if (trans_nop(ctx, &u.f_decode0)) return true; + } + if ((insn & 0x03e00000) == 0x00000000) { + /* 00001000 000..... 00000010 010..... */ + extract_decode_Fmt_1(&u.f_decode1, insn); + if (trans_copy(ctx, &u.f_decode1)) return true; + } + } + extract_decode_Fmt_2(&u.f_decode2, insn); + if (trans_or(ctx, &u.f_decode2)) return true; + return false; + } diff --git a/scripts/decodetree.py b/scripts/decodetree.py index cc5fa1a..2711c6c 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -31,6 +31,7 @@ fields = {} arguments = {} formats = {} patterns = [] +allpatterns = [] translate_prefix = 'trans' translate_scope = 'static ' @@ -300,13 +301,7 @@ class General: self.fields = flds def __str__(self): - r = self.name - if self.base: - r = r + ' ' + self.base.name - else: - r = r + ' ' + str(self.fields) - r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask) - return r + return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) def str1(self, i): return str_indent(i) + self.__str__() @@ -353,6 +348,47 @@ class Pattern(General): # end Pattern +class MultiPattern(General): + """Class representing an overlapping set of instruction patterns""" + + def __init__(self, lineno, pats, fixb, fixm, udfm): + self.file = input_file + self.lineno = lineno + self.pats = pats + self.base = None + self.fixedbits = fixb + self.fixedmask = fixm + self.undefmask = udfm + + def __str__(self): + r = "{" + for p in self.pats: + r = r + ' ' + str(p) + return r + "}" + + def output_decl(self): + for p in self.pats: + p.output_decl() + + def output_code(self, i, extracted, outerbits, outermask): + global translate_prefix + ind = str_indent(i) + for p in self.pats: + if outermask != p.fixedmask: + innermask = p.fixedmask & ~outermask + innerbits = p.fixedbits & ~outermask + output(ind, 'if ((insn & ', + '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits), + ') {\n') + output(ind, ' /* ', + str_match_bits(p.fixedbits, p.fixedmask), ' */\n') + p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) + output(ind, '}\n') + else: + p.output_code(i, extracted, p.fixedbits, p.fixedmask) +#end MultiPattern + + def parse_field(lineno, name, toks): """Parse one instruction field from TOKS at LINENO""" global fields @@ -505,6 +541,7 @@ def parse_generic(lineno, is_format, name, toks): global arguments global formats global patterns + global allpatterns global re_ident global insnwidth global insnmask @@ -649,6 +686,7 @@ def parse_generic(lineno, is_format, name, toks): pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, undefmask, fieldmask, flds) patterns.append(pat) + allpatterns.append(pat) # Validate the masks that we have assembled. if fieldmask & fixedmask: @@ -667,17 +705,66 @@ def parse_generic(lineno, is_format, name, toks): .format(allbits ^ insnmask)) # end parse_general +def build_multi_pattern(lineno, pats): + """Validate the Patterns going into a MultiPattern.""" + global patterns + global insnmask + + if len(pats) < 2: + error(lineno, 'less than two patterns within braces') + + fixedmask = insnmask + undefmask = insnmask + + # Collect fixed/undefmask for all of the children. + # Move the defining lineno back to that of the first child. + for p in pats: + fixedmask &= p.fixedmask + undefmask &= p.undefmask + if p.lineno < lineno: + lineno = p.lineno + + repeat = True + while repeat: + if fixedmask == 0: + error(lineno, 'no overlap in patterns within braces') + fixedbits = None + for p in pats: + thisbits = p.fixedbits & fixedmask + if fixedbits is None: + fixedbits = thisbits + elif fixedbits != thisbits: + fixedmask &= ~(fixedbits ^ thisbits) + break + else: + repeat = False + + mp = MultiPattern(lineno, pats, fixedbits, fixedmask, undefmask) + patterns.append(mp) +# end build_multi_pattern def parse_file(f): """Parse all of the patterns within a file""" + global patterns + # Read all of the lines of the file. Concatenate lines # ending in backslash; discard empty lines and comments. toks = [] lineno = 0 + nesting = 0 + saved_pats = [] + for line in f: lineno += 1 + # Expand and strip spaces, to find indent. + line = line.rstrip() + line = line.expandtabs() + len1 = len(line) + line = line.lstrip() + len2 = len(line) + # Discard comments end = line.find('#') if end >= 0: @@ -687,10 +774,18 @@ def parse_file(f): if len(toks) != 0: # Next line after continuation toks.extend(t) - elif len(t) == 0: - # Empty line - continue else: + # Allow completely blank lines. + if len1 == 0: + continue + indent = len1 - len2 + # Empty line due to comment. + if len(t) == 0: + # Indentation must be correct, even for comment lines. + if indent != nesting: + error(lineno, 'indentation ', indent, ' != ', nesting) + continue + start_lineno = lineno toks = t # Continuation? @@ -698,21 +793,47 @@ def parse_file(f): toks.pop() continue - if len(toks) < 2: - error(lineno, 'short line') - name = toks[0] del toks[0] + # End nesting? + if name == '}': + if nesting == 0: + error(start_lineno, 'mismatched close brace') + if len(toks) != 0: + error(start_lineno, 'extra tokens after close brace') + nesting -= 2 + if indent != nesting: + error(start_lineno, 'indentation ', indent, ' != ', nesting) + pats = patterns + patterns = saved_pats.pop() + build_multi_pattern(lineno, pats) + toks = [] + continue + + # Everything else should have current indentation. + if indent != nesting: + error(start_lineno, 'indentation ', indent, ' != ', nesting) + + # Start nesting? + if name == '{': + if len(toks) != 0: + error(start_lineno, 'extra tokens after open brace') + saved_pats.append(patterns) + patterns = [] + nesting += 2 + toks = [] + continue + # Determine the type of object needing to be parsed. if name[0] == '%': - parse_field(lineno, name[1:], toks) + parse_field(start_lineno, name[1:], toks) elif name[0] == '&': - parse_arguments(lineno, name[1:], toks) + parse_arguments(start_lineno, name[1:], toks) elif name[0] == '@': - parse_generic(lineno, True, name[1:], toks) + parse_generic(start_lineno, True, name[1:], toks) else: - parse_generic(lineno, False, name, toks) + parse_generic(start_lineno, False, name, toks) toks = [] # end parse_file @@ -789,11 +910,10 @@ def build_tree(pats, outerbits, outermask): innermask &= i.fixedmask if innermask == 0: - pnames = [] + text = 'overlapping patterns:' for p in pats: - pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) - error_with_file(pats[0].file, pats[0].lineno, - 'overlapping patterns:', pnames) + text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) + error_with_file(pats[0].file, pats[0].lineno, text) fullmask = outermask | innermask @@ -846,6 +966,7 @@ def main(): global arguments global formats global patterns + global allpatterns global translate_scope global translate_prefix global output_fd @@ -907,7 +1028,7 @@ def main(): # Make sure that the argument sets are the same, and declare the # function only once. out_pats = {} - for i in patterns: + for i in allpatterns: if i.name in out_pats: p = out_pats[i.name] if i.base.base != p.base.base: diff --git a/tests/decode/err_pattern_group_overlap1.decode b/tests/decode/err_pattern_group_overlap1.decode new file mode 100644 index 0000000..ebe3030 --- /dev/null +++ b/tests/decode/err_pattern_group_overlap1.decode @@ -0,0 +1,6 @@ +one 00000000000000000000000000000000 +{ + two 0000000000000000000000000000000 s:1 + three 000000000000000000000000000000 s:1 0 +} + -- cgit v1.1 From bf92118fa9dda4a425da7f75d43ad7b4df8d5650 Mon Sep 17 00:00:00 2001 From: Bastian Koppelmann Date: Wed, 27 Feb 2019 13:02:17 +0100 Subject: test/decode: Add tests for PatternGroups This adds one test that supposed to succeed to test deep nesting of pattern groups which is rarely exercised by targets using decode tree. The remaining tests exercise various fail conditions. Signed-off-by: Bastian Koppelmann Message-Id: <20190227120217.20794-1-kbastian@mail.uni-paderborn.de> Signed-off-by: Richard Henderson --- tests/decode/check.sh | 6 ++++++ tests/decode/err_pattern_group_empty.decode | 6 ++++++ tests/decode/err_pattern_group_ident1.decode | 10 ++++++++++ tests/decode/err_pattern_group_ident2.decode | 11 +++++++++++ tests/decode/err_pattern_group_nest1.decode | 13 +++++++++++++ tests/decode/succ_pattern_group_nest1.decode | 22 ++++++++++++++++++++++ 6 files changed, 68 insertions(+) create mode 100644 tests/decode/err_pattern_group_empty.decode create mode 100644 tests/decode/err_pattern_group_ident1.decode create mode 100644 tests/decode/err_pattern_group_ident2.decode create mode 100644 tests/decode/err_pattern_group_nest1.decode create mode 100644 tests/decode/succ_pattern_group_nest1.decode diff --git a/tests/decode/check.sh b/tests/decode/check.sh index 79a06c3..95445a0 100755 --- a/tests/decode/check.sh +++ b/tests/decode/check.sh @@ -15,4 +15,10 @@ for i in err_*.decode; do fi done +for i in succ_*.decode; do + if ! $PYTHON $DECODETREE $i > /dev/null 2> /dev/null; then + echo FAIL:$i 1>&2 + fi +done + exit $E diff --git a/tests/decode/err_pattern_group_empty.decode b/tests/decode/err_pattern_group_empty.decode new file mode 100644 index 0000000..abbff6b --- /dev/null +++ b/tests/decode/err_pattern_group_empty.decode @@ -0,0 +1,6 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +# empty groups are not allowed +{ +} diff --git a/tests/decode/err_pattern_group_ident1.decode b/tests/decode/err_pattern_group_ident1.decode new file mode 100644 index 0000000..3e65fab --- /dev/null +++ b/tests/decode/err_pattern_group_ident1.decode @@ -0,0 +1,10 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +%sub1 0:8 + +# Make sure that indentation is enforced +{ + top 00000000 00000000 00000000 00000000 + sub1 00000000 00000000 00000000 ........ %sub1 +} diff --git a/tests/decode/err_pattern_group_ident2.decode b/tests/decode/err_pattern_group_ident2.decode new file mode 100644 index 0000000..bc85923 --- /dev/null +++ b/tests/decode/err_pattern_group_ident2.decode @@ -0,0 +1,11 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +%sub1 0:8 + +# Make sure that indentation is enforced +{ + top 00000000 00000000 00000000 00000000 + sub1 00000000 00000000 00000000 ........ %sub1 +# comments are suposed to be indented +} diff --git a/tests/decode/err_pattern_group_nest1.decode b/tests/decode/err_pattern_group_nest1.decode new file mode 100644 index 0000000..92e971c --- /dev/null +++ b/tests/decode/err_pattern_group_nest1.decode @@ -0,0 +1,13 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +%sub1 0:8 +%sub2 8:8 +%sub3 16:8 +%sub4 24:8 + +# Groups with no overlap are supposed to fail +{ + top 00000000 00000000 00000000 00000000 + sub4 ........ ........ ........ ........ %sub1 %sub2 %sub3 %sub4 +} diff --git a/tests/decode/succ_pattern_group_nest1.decode b/tests/decode/succ_pattern_group_nest1.decode new file mode 100644 index 0000000..77b0f48 --- /dev/null +++ b/tests/decode/succ_pattern_group_nest1.decode @@ -0,0 +1,22 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +%sub1 0:8 +%sub2 8:8 +%sub3 16:8 +%sub4 24:7 + +# Make sure deep netsting works, as few targets will actually exercise it +{ + top 00000000 00000000 00000000 00000000 + { + sub1 00000000 00000000 00000000 ........ %sub1 + { + sub2 00000000 00000000 ........ ........ %sub1 %sub2 + { + sub3 00000000 ........ ........ ........ %sub1 %sub2 %sub3 + sub4 0....... ........ ........ ........ %sub1 %sub2 %sub3 %sub4 + } + } + } +} -- cgit v1.1 From cd3e7fc18db43b296f413814cd4b72bcd6878bc4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 23 Feb 2019 17:44:31 -0800 Subject: decodetree: Add --static-decode option Like --decode, but do not drop 'static' qualifier. Signed-off-by: Richard Henderson --- scripts/decodetree.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/decodetree.py b/scripts/decodetree.py index 2711c6c..6067e94 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -979,7 +979,8 @@ def main(): decode_scope = 'static ' - long_opts = ['decode=', 'translate=', 'output=', 'insnwidth='] + long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', + 'static-decode='] try: (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts) except getopt.GetoptError as err: @@ -990,6 +991,8 @@ def main(): elif o == '--decode': decode_function = a decode_scope = '' + elif o == '--static-decode': + decode_function = a elif o == '--translate': translate_prefix = a translate_scope = '' -- cgit v1.1 From 82bfac1c06cadeb5c7252734dc695d951185916c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 27 Feb 2019 21:37:32 -0800 Subject: decodetree: Produce clean output for an empty input file This is interesting for bisection, where an output file is plumbed, but does not yet have patterns. Signed-off-by: Richard Henderson --- scripts/decodetree.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/decodetree.py b/scripts/decodetree.py index 6067e94..6e7ba27 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -1049,15 +1049,16 @@ def main(): '(DisasContext *ctx, ', insntype, ' insn)\n{\n') i4 = str_indent(4) - output(i4, 'union {\n') - for n in sorted(arguments.keys()): - f = arguments[n] - output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') - output(i4, '} u;\n\n') - t.output_code(4, False, 0, 0) - output(i4, 'return false;\n') + if len(allpatterns) != 0: + output(i4, 'union {\n') + for n in sorted(arguments.keys()): + f = arguments[n] + output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') + output(i4, '} u;\n\n') + t.output_code(4, False, 0, 0) + output(i4, 'return false;\n') output('}\n') if output_file: -- cgit v1.1 From 263ac638a76a72841e3f513b14c515680703e084 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 28 Feb 2019 14:36:52 -0800 Subject: decodetree: Allow +- to begin a number initializing a field Signed-off-by: Richard Henderson --- scripts/decodetree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/decodetree.py b/scripts/decodetree.py index 6e7ba27..f6f58e2 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -589,7 +589,7 @@ def parse_generic(lineno, is_format, name, toks): continue # 'Foo=number' sets an argument field to a constant value - if re_fullmatch(re_ident + '=[0-9]+', t): + if re_fullmatch(re_ident + '=[+-]?[0-9]+', t): (fname, value) = t.split('=') value = int(value) flds = add_field(lineno, flds, fname, ConstField(value)) -- cgit v1.1 From 71ecf79bf40db20237a3cfc01cc407cc4cad8817 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 28 Feb 2019 14:45:50 -0800 Subject: decodetree: Prefix extract function names with decode_function This makes it easier to name Formats within multiple decode files. Signed-off-by: Richard Henderson --- scripts/decodetree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/decodetree.py b/scripts/decodetree.py index f6f58e2..ac158b4 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -312,7 +312,8 @@ class Format(General): """Class representing an instruction format""" def extract_name(self): - return 'extract_' + self.name + global decode_function + return decode_function + '_extract_' + self.name def output_extract(self): output('static void ', self.extract_name(), '(', -- cgit v1.1 From 2decfc95583dc28add69810eaca6ada7b4b44d3a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 5 Mar 2019 15:34:41 -0800 Subject: decodetree: Properly diagnose fields overflowing an insn Previously this would result in an exception for shifting the field mask by a negative number. Signed-off-by: Richard Henderson --- scripts/decodetree.py | 2 ++ tests/decode/err_width1.decode | 5 +++++ tests/decode/err_width2.decode | 5 +++++ tests/decode/err_width3.decode | 5 +++++ tests/decode/err_width4.decode | 5 +++++ 5 files changed, 22 insertions(+) create mode 100644 tests/decode/err_width1.decode create mode 100644 tests/decode/err_width2.decode create mode 100644 tests/decode/err_width3.decode create mode 100644 tests/decode/err_width4.decode diff --git a/scripts/decodetree.py b/scripts/decodetree.py index ac158b4..aa790b5 100755 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -622,6 +622,8 @@ def parse_generic(lineno, is_format, name, toks): sign = True flen = flen[1:] shift = int(flen, 10) + if shift + width > insnwidth: + error(lineno, 'field {0} exceeds insnwidth'.format(fname)) f = Field(sign, insnwidth - width - shift, shift) flds = add_field(lineno, flds, fname, f) fixedbits <<= shift diff --git a/tests/decode/err_width1.decode b/tests/decode/err_width1.decode new file mode 100644 index 0000000..0c14f6d --- /dev/null +++ b/tests/decode/err_width1.decode @@ -0,0 +1,5 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +# Diagnose too many bits (33 of 32) +one 000000000000000000000000000000000 diff --git a/tests/decode/err_width2.decode b/tests/decode/err_width2.decode new file mode 100644 index 0000000..47f0acf --- /dev/null +++ b/tests/decode/err_width2.decode @@ -0,0 +1,5 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +# Diagnose too few bits (31 of 32) +one 0000000000000000000000000000000 diff --git a/tests/decode/err_width3.decode b/tests/decode/err_width3.decode new file mode 100644 index 0000000..c5fb6b3 --- /dev/null +++ b/tests/decode/err_width3.decode @@ -0,0 +1,5 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +# Diagnose too many bits (33 of 32) +one 0 s:32 diff --git a/tests/decode/err_width4.decode b/tests/decode/err_width4.decode new file mode 100644 index 0000000..1588a63 --- /dev/null +++ b/tests/decode/err_width4.decode @@ -0,0 +1,5 @@ +# This work is licensed under the terms of the GNU LGPL, version 2 or later. +# See the COPYING.LIB file in the top-level directory. + +# Diagnose too few bits (31 of 32) +one 0 s:30 -- cgit v1.1