Revert "gas: have scrubber retain more whitespace"

This reverts commit 6ae8a30d44f016cafb46a75843b5109316eb1996. This fixes PR gas/32073.
author: H.J. Lu <hjl.tools@gmail.com> 2024-08-12 08:43:21 -0700
committer: H.J. Lu <hjl.tools@gmail.com> 2024-08-12 10:42:02 -0700
commit: 87582defe75340e0003d4700568322379e9bc9f6 (patch)
tree: 075b5b18912666f9c0955884de736544a6f5c7cb /gas/app.c
parent: 65d41fb015af0048347a25d0665bb1263eb982b0 (diff)
download: fsf-binutils-gdb-87582defe75340e0003d4700568322379e9bc9f6.zip
fsf-binutils-gdb-87582defe75340e0003d4700568322379e9bc9f6.tar.gz
fsf-binutils-gdb-87582defe75340e0003d4700568322379e9bc9f6.tar.bz2
1 files changed, 121 insertions, 50 deletions
diff --git a/gas/app.c b/gas/app.c
index b88b4c9..41ba416 100644
--- a/gas/app.c
+++ b/gas/app.c
@@ -467,18 +467,16 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 
   /*State 0: beginning of normal line
 	  1: After first whitespace on line (flush more white)
-	  2: After first non-white (opcode or maybe label when they're followed
-	     by colons) on line (keep 1white)
-	  3: after subsequent white on line (typically into operands)
-	     (flush more white)
+	  2: After first non-white (opcode) on line (keep 1white)
+	  3: after second white on line (into operands) (flush white)
 	  4: after putting out a .linefile, put out digits
 	  5: parsing a string, then go to old-state
 	  6: putting out \ escape in a "d string.
 	  7: no longer used
 	  8: no longer used
-	  9: After seeing non-white in state 3 (keep 1white)
-	 10: no longer used
-	 11: After seeing a non-white character in state 0 (eg a label definition)
+	  9: After seeing symbol char in state 3 (keep 1white after symchar)
+	 10: After seeing whitespace in state 9 (keep white before symchar)
+	 11: After seeing a symbol character in state 0 (eg a label definition)
 	 -1: output string in out_string and go to the state in old_state
 	 12: no longer used
 #ifdef DOUBLEBAR_PARALLEL
@@ -941,11 +939,7 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 	          && (state < 1 || strchr (tc_comment_chars, ch)))
 	      || IS_NEWLINE (ch)
 	      || IS_LINE_SEPARATOR (ch)
-	      || IS_PARALLEL_SEPARATOR (ch)
-	      /* See comma related comment near the bottom of the function.
-		 Reasoning equally applies to whitespace preceding a comma in
-		 most cases.  */
-	      || (ch == ',' && state > 2 && state != 11))
+	      || IS_PARALLEL_SEPARATOR (ch))
 	    {
 	      if (scrub_m68k_mri)
 		{
@@ -988,7 +982,6 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 		 character at the beginning of a line.  */
 	      goto recycle;
 	    case 2:
-	    case 9:
 	      state = 3;
 	      if (to + 1 < toend)
 		{
@@ -1012,6 +1005,20 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 		  break;
 		}
 	      goto recycle;	/* Sp in operands */
+	    case 9:
+	    case 10:
+#ifndef TC_KEEP_OPERAND_SPACES
+	      if (scrub_m68k_mri)
+#endif
+		{
+		  /* In MRI mode, we keep these spaces.  */
+		  state = 3;
+		  UNGET (ch);
+		  PUT (' ');
+		  break;
+		}
+	      state = 10;	/* Sp after symbol char */
+	      goto recycle;
 	    case 11:
 	      if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
 		state = 1;
@@ -1082,17 +1089,27 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 	    {
 	      if (ch2 != EOF)
 		UNGET (ch2);
-	      if (state == 1)
-		state = 2;
-	      else if (state == 3)
-		state = 9;
+	      if (state == 9 || state == 10)
+		state = 3;
 	      PUT (ch);
 	    }
 	  break;
 
 	case LEX_IS_STRINGQUOTE:
 	  quotechar = ch;
-	  if (state == 3)
+	  if (state == 10)
+	    {
+	      /* Preserve the whitespace in foo "bar".  */
+	      UNGET (ch);
+	      state = 3;
+	      PUT (' ');
+
+	      /* PUT didn't jump out.  We could just break, but we
+		 know what will happen, so optimize a bit.  */
+	      ch = GET ();
+	      old_state = 9;
+	    }
+	  else if (state == 3)
 	    old_state = 9;
 	  else if (state == 0)
 	    old_state = 11; /* Now seeing label definition.  */
@@ -1113,6 +1130,14 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 	      UNGET (c);
 	    }
 #endif
+	  if (state == 10)
+	    {
+	      /* Preserve the whitespace in foo 'b'.  */
+	      UNGET (ch);
+	      state = 3;
+	      PUT (' ');
+	      break;
+	    }
 	  ch = GET ();
 	  if (ch == EOF)
 	    {
@@ -1147,7 +1172,10 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 	      PUT (out_buf[0]);
 	      break;
 	    }
-	  old_state = state;
+	  if (state == 9)
+	    old_state = 3;
+	  else
+	    old_state = state;
 	  state = -1;
 	  out_string = out_buf;
 	  PUT (*out_string++);
@@ -1157,10 +1185,10 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 #ifdef KEEP_WHITE_AROUND_COLON
 	  state = 9;
 #else
-	  if (state == 2 || state == 11)
+	  if (state == 9 || state == 10)
+	    state = 3;
+	  else if (state != 3)
 	    state = 1;
-	  else
-	    state = 9;
 #endif
 	  PUT (ch);
 	  break;
@@ -1285,6 +1313,20 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 	      break;
 	    }
 
+#ifdef TC_D10V
+	  /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
+	     Trap is the only short insn that has a first operand that is
+	     neither register nor label.
+	     We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
+	     We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
+	     already LEX_IS_LINE_COMMENT_START.  However, it is the
+	     only character in line_comment_chars for d10v, hence we
+	     can recognize it as such.  */
+	  /* An alternative approach would be to reset the state to 1 when
+	     we see '||', '<'- or '->', but that seems to be overkill.  */
+	  if (state == 10)
+	    PUT (' ');
+#endif
 	  /* We have a line comment character which is not at the
 	     start of a line.  If this is also a normal comment
 	     character, fall through.  Otherwise treat it as a default
@@ -1348,6 +1390,17 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 	  /* Fall through.  */
 
 	case LEX_IS_SYMBOL_COMPONENT:
+	  if (state == 10)
+	    {
+	      /* This is a symbol character following another symbol
+		 character, with whitespace in between.  We skipped
+		 the whitespace earlier, so output it now.  */
+	      UNGET (ch);
+	      state = 3;
+	      PUT (' ');
+	      break;
+	    }
+
 #ifdef TC_Z80
 	  /* "af'" is a symbol containing '\''.  */
 	  if (state == 3 && (ch == 'a' || ch == 'A'))
@@ -1373,16 +1426,7 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 		}
 	    }
 #endif
-
-	  /* Fall through.  */
-	default:
-	de_fault:
-	  /* Some relatively `normal' character.  */
-	  if (state == 0)
-	    state = 11;	/* Now seeing label definition.  */
-	  else if (state == 1)
-	    state = 2;	/* Ditto.  */
-	  else if (state == 3)
+	  if (state == 3)
 	    state = 9;
 
 	  /* This is a common case.  Quickly copy CH and all the
@@ -1392,10 +1436,6 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 #if defined TC_ARM && defined OBJ_ELF
 	      && symver_state == NULL
 #endif
-#ifdef TC_Z80
-	      /* See comma related comment below.  */
-	      && ch != ','
-#endif
 	      )
 	    {
 	      char *s;
@@ -1410,12 +1450,6 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 		  if (type != 0
 		      && type != LEX_IS_SYMBOL_COMPONENT)
 		    break;
-#ifdef TC_Z80
-		  /* Need to split at commas, to be able to enter state 16
-		     when needed.  */
-		  if (ch2 == ',')
-		    break;
-#endif
 		}
 
 	      if (s > from)
@@ -1440,15 +1474,52 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
 		}
 	    }
 
-	  /* As a special case, to limit the delta to previous behavior, e.g.
-	     also affecting listings, go straight to state 3 when seeing a
-	     comma. Commas are special: While they can be used to separate
-	     macro parameters or arguments, they cannot (on their own, i.e.
-	     without quoting) be arguments (or parameter default values).
-	     Hence successive whitespace is not meaningful there.  */
-	  if (ch == ',' && state == 9)
-	    state = 3;
+	  /* Fall through.  */
+	default:
+	de_fault:
+	  /* Some relatively `normal' character.  */
+	  if (state == 0)
+	    {
+	      state = 11;	/* Now seeing label definition.  */
+	    }
+	  else if (state == 1)
+	    {
+	      state = 2;	/* Ditto.  */
+	    }
+	  else if (state == 9)
+	    {
+	      if (!IS_SYMBOL_COMPONENT (ch))
+		state = 3;
+	    }
+	  else if (state == 10)
+	    {
+	      if (ch == '\\')
+		{
+		  /* Special handling for backslash: a backslash may
+		     be the beginning of a formal parameter (of a
+		     macro) following another symbol character, with
+		     whitespace in between.  If that is the case, we
+		     output a space before the parameter.  Strictly
+		     speaking, correct handling depends upon what the
+		     macro parameter expands into; if the parameter
+		     expands into something which does not start with
+		     an operand character, then we don't want to keep
+		     the space.  We don't have enough information to
+		     make the right choice, so here we are making the
+		     choice which is more likely to be correct.  */
+		  if (to + 1 >= toend)
+		    {
+		      /* If we're near the end of the buffer, save the
+		         character for the next time round.  Otherwise
+		         we'll lose our state.  */
+		      UNGET (ch);
+		      goto tofull;
+		    }
+		  *to++ = ' ';
+		}
 
+	      state = 3;
+	    }
 	  PUT (ch);
 	  break;
 	}
author	H.J. Lu <hjl.tools@gmail.com>	2024-08-12 08:43:21 -0700
committer	H.J. Lu <hjl.tools@gmail.com>	2024-08-12 10:42:02 -0700
commit	87582defe75340e0003d4700568322379e9bc9f6 (patch)
tree	075b5b18912666f9c0955884de736544a6f5c7cb /gas/app.c
parent	65d41fb015af0048347a25d0665bb1263eb982b0 (diff)
download	fsf-binutils-gdb-87582defe75340e0003d4700568322379e9bc9f6.zip fsf-binutils-gdb-87582defe75340e0003d4700568322379e9bc9f6.tar.gz fsf-binutils-gdb-87582defe75340e0003d4700568322379e9bc9f6.tar.bz2