aboutsummaryrefslogtreecommitdiff
path: root/opcodes/mep-asm.c
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2024-08-09 11:59:31 +0200
committerJan Beulich <jbeulich@suse.com>2024-08-09 11:59:31 +0200
commit6ae8a30d44f016cafb46a75843b5109316eb1996 (patch)
tree3043cc5b62038e700957a8ed3671ec7c7a1dc36f /opcodes/mep-asm.c
parenteb2b444321416f5bb98286647ee9769dfee5995d (diff)
downloadbinutils-6ae8a30d44f016cafb46a75843b5109316eb1996.zip
binutils-6ae8a30d44f016cafb46a75843b5109316eb1996.tar.gz
binutils-6ae8a30d44f016cafb46a75843b5109316eb1996.tar.bz2
gas: have scrubber retain more whitespace
According to the description of the state machine, the expectation appears to be that (leaving aside labels) any insn mnemonic or directive would be followed by a comma separated list of operands. That may have been true very long ago, but the latest with the advent of more elaborate macros this isn't rhe case anymore. Neither macro parameters in macro definitions nor macro arguments in macro invocations are required to be separated by commas. Hence whitespace serves a crucial role there. Plus even without "real" macros issues exist, in e.g. .irp n, ... insn\n\(suffix) operand1, operand2 .endr Whitespace following the closing parenthesis would have been removed (ahead of even processing the .irp), as the "opcode" was deemed to have ended earlier already. Therefore, squash the distinction between "opcode" and operands, i.e. fold state 10 back into state 3. Also drop most of the distinction between "symbol chars" and "relatively normal" ones. Not entirely unexpectedly this results in the need to skip whitespace in a few more places in arch-specific code (and quite likely more changes are needed for insn forms not covered by the testsuite). As a result the D10V special case is no longer necessary. In config/tc-sparc.c also move a comment to be next to the code being commented. In opcodes/cgen-asm.in some further cleanup is done, following the local var adjustments.
Diffstat (limited to 'opcodes/mep-asm.c')
-rw-r--r--opcodes/mep-asm.c52
1 files changed, 34 insertions, 18 deletions
diff --git a/opcodes/mep-asm.c b/opcodes/mep-asm.c
index 6e72ddb..87af40c 100644
--- a/opcodes/mep-asm.c
+++ b/opcodes/mep-asm.c
@@ -1331,6 +1331,7 @@ mep_cgen_build_insn_regex (CGEN_INSN *insn)
char rxbuf[CGEN_MAX_RX_ELEMENTS];
char *rx = rxbuf;
const CGEN_SYNTAX_CHAR_TYPE *syn;
+ char prev_syntax_char = 0;
int reg_err;
syn = CGEN_SYNTAX_STRING (CGEN_OPCODE_SYNTAX (opc));
@@ -1368,6 +1369,15 @@ mep_cgen_build_insn_regex (CGEN_INSN *insn)
{
char c = CGEN_SYNTAX_CHAR (* syn);
+ /* See whitespace related comments in parse_insn_normal(). */
+ if (c != ' ' && prev_syntax_char != ' '
+ && (!ISALNUM (c) || !ISALNUM (prev_syntax_char)))
+ {
+ *rx++ = ' ';
+ *rx++ = '*';
+ }
+ prev_syntax_char = c;
+
switch (c)
{
/* Escape any regex metacharacters in the syntax. */
@@ -1401,6 +1411,7 @@ mep_cgen_build_insn_regex (CGEN_INSN *insn)
/* Replace non-syntax fields with globs. */
*rx++ = '.';
*rx++ = '*';
+ prev_syntax_char = 0;
}
}
@@ -1458,10 +1469,8 @@ parse_insn_normal (CGEN_CPU_DESC cd,
const char *errmsg;
const char *p;
const CGEN_SYNTAX_CHAR_TYPE * syn;
-#ifdef CGEN_MNEMONIC_OPERANDS
- /* FIXME: wip */
- int past_opcode_p;
-#endif
+ char prev_syntax_char = 0;
+ bool past_opcode_p;
/* For now we assume the mnemonic is first (there are no leading operands).
We can parse it without needing to set up operand parsing.
@@ -1477,13 +1486,13 @@ parse_insn_normal (CGEN_CPU_DESC cd,
#ifndef CGEN_MNEMONIC_OPERANDS
if (* str && ! ISSPACE (* str))
return _("unrecognized instruction");
+ past_opcode_p = true;
+#else
+ past_opcode_p = false;
#endif
CGEN_INIT_PARSE (cd);
cgen_init_parse_operand (cd);
-#ifdef CGEN_MNEMONIC_OPERANDS
- past_opcode_p = 0;
-#endif
/* We don't check for (*str != '\0') here because we want to parse
any trailing fake arguments in the syntax string. */
@@ -1497,18 +1506,28 @@ parse_insn_normal (CGEN_CPU_DESC cd,
while (* syn != 0)
{
+ char c = CGEN_SYNTAX_CHAR_P (*syn) ? CGEN_SYNTAX_CHAR (*syn) : 0;
+
+ /* FIXME: Despite this check we may still take inappropriate advantage of
+ the fact that GAS's input scrubber will remove extraneous whitespace.
+ We may also be a little too lax with this now, yet being more strict
+ would require targets to indicate where whitespace is permissible. */
+ if (past_opcode_p && c != ' ' && ISSPACE (*str)
+ /* No whitespace between consecutive alphanumeric syntax elements. */
+ && (!ISALNUM (c) || !ISALNUM (prev_syntax_char)))
+ ++str;
+ prev_syntax_char = c;
+
/* Non operand chars must match exactly. */
- if (CGEN_SYNTAX_CHAR_P (* syn))
+ if (c != 0)
{
/* FIXME: While we allow for non-GAS callers above, we assume the
first char after the mnemonic part is a space. */
- /* FIXME: We also take inappropriate advantage of the fact that
- GAS's input scrubber will remove extraneous blanks. */
- if (TOLOWER (*str) == TOLOWER (CGEN_SYNTAX_CHAR (* syn)))
+ if (TOLOWER (*str) == TOLOWER (c))
{
#ifdef CGEN_MNEMONIC_OPERANDS
- if (CGEN_SYNTAX_CHAR(* syn) == ' ')
- past_opcode_p = 1;
+ if (c == ' ')
+ past_opcode_p = true;
#endif
++ syn;
++ str;
@@ -1520,7 +1539,7 @@ parse_insn_normal (CGEN_CPU_DESC cd,
/* xgettext:c-format */
sprintf (msg, _("syntax error (expected char `%c', found `%c')"),
- CGEN_SYNTAX_CHAR(*syn), *str);
+ c, *str);
return msg;
}
else
@@ -1530,15 +1549,12 @@ parse_insn_normal (CGEN_CPU_DESC cd,
/* xgettext:c-format */
sprintf (msg, _("syntax error (expected char `%c', found end of instruction)"),
- CGEN_SYNTAX_CHAR(*syn));
+ c);
return msg;
}
continue;
}
-#ifdef CGEN_MNEMONIC_OPERANDS
- (void) past_opcode_p;
-#endif
/* We have an operand of some sort. */
errmsg = cd->parse_operand (cd, CGEN_SYNTAX_FIELD (*syn), &str, fields);
if (errmsg)