aboutsummaryrefslogtreecommitdiff
path: root/gas/app.c
diff options
context:
space:
mode:
Diffstat (limited to 'gas/app.c')
-rw-r--r--gas/app.c273
1 files changed, 169 insertions, 104 deletions
diff --git a/gas/app.c b/gas/app.c
index 041941a..8dc69ff 100644
--- a/gas/app.c
+++ b/gas/app.c
@@ -45,6 +45,8 @@ static int scrub_m68k_mri;
/* The pseudo-op which switches in and out of MRI mode. See the
comment in do_scrub_chars. */
static const char mri_pseudo[] = ".mri 0";
+static const char *mri_state;
+static char mri_last_ch;
#else
#define scrub_m68k_mri 0
#endif
@@ -56,11 +58,16 @@ static const char symver_pseudo[] = ".symver";
static const char * symver_state;
#endif
-static char last_char;
+/* The pseudo-op (without leading dot) at which we want to (perhaps just
+ temporarily) stop processing. See the comments in do_scrub_chars(). */
+static const char end_pseudo[] = "end ";
+static const char * end_state;
-static char lex[256];
-static const char symbol_chars[] =
-"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+/* Whether, considering the state at start of assembly, NO_PSEUDO_DOT is
+ active. */
+static bool no_pseudo_dot;
+
+static char last_char;
#define LEX_IS_SYMBOL_COMPONENT 1
#define LEX_IS_WHITESPACE 2
@@ -75,9 +82,6 @@ static const char symbol_chars[] =
#ifdef TC_V850
#define LEX_IS_DOUBLEDASH_1ST 12
#endif
-#ifdef TC_M32R
-#define DOUBLEBAR_PARALLEL
-#endif
#ifdef DOUBLEBAR_PARALLEL
#define LEX_IS_DOUBLEBAR_1ST 13
#endif
@@ -91,25 +95,86 @@ static const char symbol_chars[] =
#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
+#define IS_TWOCHAR_COMMENT_1ST(c) (lex[c] == LEX_IS_TWOCHAR_COMMENT_1ST)
#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
-static int process_escape (int);
-
-/* FIXME-soon: The entire lexer/parser thingy should be
- built statically at compile time rather than dynamically
- each and every time the assembler is run. xoxorich. */
+static char lex[256] = {
+ [' '] = LEX_IS_WHITESPACE,
+ ['\t'] = LEX_IS_WHITESPACE,
+#ifdef CR_EOL
+ ['\r'] = LEX_IS_LINE_SEPARATOR,
+#else
+ ['\r'] = LEX_IS_WHITESPACE,
+#endif
+ ['\n'] = LEX_IS_NEWLINE,
+ [':'] = LEX_IS_COLON,
+ ['$'] = LEX_IS_SYMBOL_COMPONENT,
+ ['.'] = LEX_IS_SYMBOL_COMPONENT,
+ ['_'] = LEX_IS_SYMBOL_COMPONENT,
+ ['A'] = LEX_IS_SYMBOL_COMPONENT, ['a'] = LEX_IS_SYMBOL_COMPONENT,
+ ['B'] = LEX_IS_SYMBOL_COMPONENT, ['b'] = LEX_IS_SYMBOL_COMPONENT,
+ ['C'] = LEX_IS_SYMBOL_COMPONENT, ['c'] = LEX_IS_SYMBOL_COMPONENT,
+ ['D'] = LEX_IS_SYMBOL_COMPONENT, ['d'] = LEX_IS_SYMBOL_COMPONENT,
+ ['E'] = LEX_IS_SYMBOL_COMPONENT, ['e'] = LEX_IS_SYMBOL_COMPONENT,
+ ['F'] = LEX_IS_SYMBOL_COMPONENT, ['f'] = LEX_IS_SYMBOL_COMPONENT,
+ ['G'] = LEX_IS_SYMBOL_COMPONENT, ['g'] = LEX_IS_SYMBOL_COMPONENT,
+ ['H'] = LEX_IS_SYMBOL_COMPONENT, ['h'] = LEX_IS_SYMBOL_COMPONENT,
+ ['I'] = LEX_IS_SYMBOL_COMPONENT, ['i'] = LEX_IS_SYMBOL_COMPONENT,
+ ['J'] = LEX_IS_SYMBOL_COMPONENT, ['j'] = LEX_IS_SYMBOL_COMPONENT,
+ ['K'] = LEX_IS_SYMBOL_COMPONENT, ['k'] = LEX_IS_SYMBOL_COMPONENT,
+ ['L'] = LEX_IS_SYMBOL_COMPONENT, ['l'] = LEX_IS_SYMBOL_COMPONENT,
+ ['M'] = LEX_IS_SYMBOL_COMPONENT, ['m'] = LEX_IS_SYMBOL_COMPONENT,
+ ['N'] = LEX_IS_SYMBOL_COMPONENT, ['n'] = LEX_IS_SYMBOL_COMPONENT,
+ ['O'] = LEX_IS_SYMBOL_COMPONENT, ['o'] = LEX_IS_SYMBOL_COMPONENT,
+ ['P'] = LEX_IS_SYMBOL_COMPONENT, ['p'] = LEX_IS_SYMBOL_COMPONENT,
+ ['Q'] = LEX_IS_SYMBOL_COMPONENT, ['q'] = LEX_IS_SYMBOL_COMPONENT,
+ ['R'] = LEX_IS_SYMBOL_COMPONENT, ['r'] = LEX_IS_SYMBOL_COMPONENT,
+ ['S'] = LEX_IS_SYMBOL_COMPONENT, ['s'] = LEX_IS_SYMBOL_COMPONENT,
+ ['T'] = LEX_IS_SYMBOL_COMPONENT, ['t'] = LEX_IS_SYMBOL_COMPONENT,
+ ['U'] = LEX_IS_SYMBOL_COMPONENT, ['u'] = LEX_IS_SYMBOL_COMPONENT,
+ ['V'] = LEX_IS_SYMBOL_COMPONENT, ['v'] = LEX_IS_SYMBOL_COMPONENT,
+ ['W'] = LEX_IS_SYMBOL_COMPONENT, ['w'] = LEX_IS_SYMBOL_COMPONENT,
+ ['X'] = LEX_IS_SYMBOL_COMPONENT, ['x'] = LEX_IS_SYMBOL_COMPONENT,
+ ['Y'] = LEX_IS_SYMBOL_COMPONENT, ['y'] = LEX_IS_SYMBOL_COMPONENT,
+ ['Z'] = LEX_IS_SYMBOL_COMPONENT, ['z'] = LEX_IS_SYMBOL_COMPONENT,
+ ['0'] = LEX_IS_SYMBOL_COMPONENT,
+ ['1'] = LEX_IS_SYMBOL_COMPONENT,
+ ['2'] = LEX_IS_SYMBOL_COMPONENT,
+ ['3'] = LEX_IS_SYMBOL_COMPONENT,
+ ['4'] = LEX_IS_SYMBOL_COMPONENT,
+ ['5'] = LEX_IS_SYMBOL_COMPONENT,
+ ['6'] = LEX_IS_SYMBOL_COMPONENT,
+ ['7'] = LEX_IS_SYMBOL_COMPONENT,
+ ['8'] = LEX_IS_SYMBOL_COMPONENT,
+ ['9'] = LEX_IS_SYMBOL_COMPONENT,
+#define INIT2(n) [n] = LEX_IS_SYMBOL_COMPONENT, \
+ [(n) + 1] = LEX_IS_SYMBOL_COMPONENT
+#define INIT4(n) INIT2 (n), INIT2 ((n) + 2)
+#define INIT8(n) INIT4 (n), INIT4 ((n) + 4)
+#define INIT16(n) INIT8 (n), INIT8 ((n) + 8)
+#define INIT32(n) INIT16 (n), INIT16 ((n) + 16)
+#define INIT64(n) INIT32 (n), INIT32 ((n) + 32)
+#define INIT128(n) INIT64 (n), INIT64 ((n) + 64)
+ INIT128 (128),
+#undef INIT128
+#undef INIT64
+#undef INIT32
+#undef INIT16
+#undef INIT8
+#undef INIT4
+#undef INIT2
+};
void
do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
{
const char *p;
- int c;
- lex[' '] = LEX_IS_WHITESPACE;
- lex['\t'] = LEX_IS_WHITESPACE;
- lex['\r'] = LEX_IS_WHITESPACE;
- lex['\n'] = LEX_IS_NEWLINE;
- lex[':'] = LEX_IS_COLON;
+ /* Latch this once at start. xtensa uses a hook function, yet context isn't
+ meaningful for scrubbing (or else we'd need to sync scrubber behavior as
+ state changes). */
+ if (lex['/'] == 0)
+ no_pseudo_dot = NO_PSEUDO_DOT;
#ifdef TC_M68K
scrub_m68k_mri = m68k_mri;
@@ -133,11 +198,6 @@ do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
/* Note that these override the previous defaults, e.g. if ';' is a
comment char, then it isn't a line separator. */
- for (p = symbol_chars; *p; ++p)
- lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
-
- for (c = 128; c < 256; ++c)
- lex[c] = LEX_IS_SYMBOL_COMPONENT;
#ifdef tc_symbol_chars
/* This macro permits the processor to specify all characters which
@@ -156,6 +216,9 @@ do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
for (p = tc_comment_chars; *p; p++)
lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
+ /* While counter intuitive to have more special purpose line comment chars
+ override more general purpose ordinary ones, logic in do_scrub_chars()
+ depends on this ordering. */
for (p = line_comment_chars; *p; p++)
lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
@@ -172,7 +235,8 @@ do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
#endif
- /* Only allow slash-star comments if slash is not in use.
+ /* Only allow slash-star comments if slash is not in use. Certain
+ other cases are dealt with in LEX_IS_LINE_COMMENT_START handling.
FIXME: This isn't right. We should always permit them. */
if (lex['/'] == 0)
lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
@@ -218,8 +282,6 @@ static int add_newlines;
static char *saved_input;
static size_t saved_input_len;
static char input_buffer[32 * 1024];
-static const char *mri_state;
-static char mri_last_ch;
/* Data structure for saving the state of app across #include's. Note that
app is called asynchronously to the parsing of the .include's, so our
@@ -235,11 +297,12 @@ struct app_save
int add_newlines;
char * saved_input;
size_t saved_input_len;
+ const char * end_state;
#ifdef TC_M68K
int scrub_m68k_mri;
-#endif
const char * mri_state;
char mri_last_ch;
+#endif
#if defined TC_ARM && defined OBJ_ELF
const char * symver_state;
#endif
@@ -265,11 +328,12 @@ app_push (void)
memcpy (saved->saved_input, saved_input, saved_input_len);
saved->saved_input_len = saved_input_len;
}
+ saved->end_state = end_state;
#ifdef TC_M68K
saved->scrub_m68k_mri = scrub_m68k_mri;
-#endif
saved->mri_state = mri_state;
saved->mri_last_ch = mri_last_ch;
+#endif
#if defined TC_ARM && defined OBJ_ELF
saved->symver_state = symver_state;
#endif
@@ -305,11 +369,12 @@ app_pop (char *arg)
saved_input_len = saved->saved_input_len;
free (saved->saved_input);
}
+ end_state = saved->end_state;
#ifdef TC_M68K
scrub_m68k_mri = saved->scrub_m68k_mri;
-#endif
mri_state = saved->mri_state;
mri_last_ch = saved->mri_last_ch;
+#endif
#if defined TC_ARM && defined OBJ_ELF
symver_state = saved->symver_state;
#endif
@@ -406,7 +471,8 @@ scan_for_multibyte_characters (const unsigned char * start,
This is the way the old code used to work. */
size_t
-do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
+do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen,
+ bool check_multibyte)
{
char *to = tostart;
char *toend = tostart + tolen;
@@ -430,11 +496,7 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
10: After seeing whitespace in state 9 (keep white before symchar)
11: After seeing a symbol character in state 0 (eg a label definition)
-1: output string in out_string and go to the state in old_state
- -2: flush text until a '*' '/' is seen, then go to state old_state
-#ifdef TC_V850
- 12: After seeing a dash, looking for a second dash as a start
- of comment.
-#endif
+ 12: no longer used
#ifdef DOUBLEBAR_PARALLEL
13: After seeing a vertical bar, looking for a second
vertical bar as a parallel expression separator.
@@ -513,7 +575,7 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
from = input_buffer;
fromend = from + fromlen;
- if (multibyte_handling == multibyte_warn)
+ if (check_multibyte)
(void) scan_for_multibyte_characters ((const unsigned char *) from,
(const unsigned char* ) fromend,
true /* Generate warnings. */);
@@ -536,43 +598,6 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
PUT (ch);
continue;
- case -2:
- for (;;)
- {
- do
- {
- ch = GET ();
-
- if (ch == EOF)
- {
- as_warn (_("end of file in comment"));
- goto fromeof;
- }
-
- if (ch == '\n')
- PUT ('\n');
- }
- while (ch != '*');
-
- while ((ch = GET ()) == '*')
- ;
-
- if (ch == EOF)
- {
- as_warn (_("end of file in comment"));
- goto fromeof;
- }
-
- if (ch == '/')
- break;
-
- UNGET (ch);
- }
-
- state = old_state;
- UNGET (' ');
- continue;
-
case 4:
ch = GET ();
if (ch == EOF)
@@ -730,16 +755,6 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
line from just after the first white space. */
state = 1;
PUT ('|');
-#ifdef TC_TIC6X
- /* "||^" is used for SPMASKed instructions. */
- ch = GET ();
- if (ch == EOF)
- goto fromeof;
- else if (ch == '^')
- PUT ('^');
- else
- UNGET (ch);
-#endif
continue;
#endif
#ifdef TC_Z80
@@ -803,12 +818,51 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
recycle:
+ /* We need to watch out for .end directives: We should in particular not
+ issue diagnostics for anything after an active one. */
+ if (end_state == NULL)
+ {
+ if ((state == 0 || state == 1)
+ && (ch == '.'
+ || (no_pseudo_dot && ch == end_pseudo[0])))
+ end_state = end_pseudo + (ch != '.');
+ }
+ else if (ch != '\0'
+ && (*end_state == ch
+ /* Avoid triggering on directives like .endif or .endr. */
+ || (*end_state == ' ' && !IS_SYMBOL_COMPONENT (ch))))
+ {
+ if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
+ goto end_end;
+ ++end_state;
+ }
+ else if (*end_state != '\0')
+ /* We did not get the expected character, or we didn't
+ get a valid terminating character after seeing the
+ entire pseudo-op, so we must go back to the beginning. */
+ end_state = NULL;
+ else if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
+ {
+ end_end:
+ /* We've read the entire pseudo-op. If this is the end of the line,
+ bail out now by (ab)using the output-full path. This allows the
+ caller to process input up to here and terminate processing if this
+ directive is actually active (not on the false branch of a
+ conditional and not in a macro definition). */
+ end_state = NULL;
+ state = 0;
+ PUT (ch);
+ goto tofull;
+ }
+
#if defined TC_ARM && defined OBJ_ELF
/* We need to watch out for .symver directives. See the comment later
in this function. */
if (symver_state == NULL)
{
- if ((state == 0 || state == 1) && ch == symver_pseudo[0])
+ if ((state == 0 || state == 1)
+ && strchr (tc_comment_chars, '@') != NULL
+ && ch == symver_pseudo[0])
symver_state = symver_pseudo + 1;
}
else
@@ -826,7 +880,7 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
{
/* We've read the entire pseudo-op. If this is the end
of the line, go back to the beginning. */
- if (IS_NEWLINE (ch))
+ if (IS_NEWLINE (ch) || IS_LINE_SEPARATOR (ch))
symver_state = NULL;
}
}
@@ -853,7 +907,7 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
if (ch != '\0'
&& (*mri_state == ch
|| (*mri_state == ' '
- && lex[ch] == LEX_IS_WHITESPACE)
+ && IS_WHITESPACE (ch))
|| (*mri_state == '0'
&& ch == '1')))
{
@@ -861,8 +915,9 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
++mri_state;
}
else if (*mri_state != '\0'
- || (lex[ch] != LEX_IS_WHITESPACE
- && lex[ch] != LEX_IS_NEWLINE))
+ || (!IS_WHITESPACE (ch)
+ && !IS_LINE_SEPARATOR (ch)
+ && !IS_NEWLINE (ch)))
{
/* We did not get the expected character, or we didn't
get a valid terminating character after seeing the
@@ -934,7 +989,12 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
}
}
#endif
+
+ /* Prune trailing whitespace. */
if (IS_COMMENT (ch)
+ || (IS_LINE_COMMENT (ch)
+ && (state < 1 || strchr (tc_comment_chars, ch)))
+ || IS_NEWLINE (ch)
|| IS_LINE_SEPARATOR (ch)
|| IS_PARALLEL_SEPARATOR (ch))
{
@@ -947,6 +1007,16 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
}
goto recycle;
}
+#ifdef DOUBLESLASH_LINE_COMMENTS
+ if (IS_TWOCHAR_COMMENT_1ST (ch))
+ {
+ ch2 = GET ();
+ if (ch2 != EOF)
+ UNGET (ch2);
+ if (ch2 == '/')
+ goto recycle;
+ }
+#endif
/* If we're in state 2 or 11, we've seen a non-white
character followed by whitespace. If the next character
@@ -1029,6 +1099,7 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
ch2 = GET ();
if (ch2 == '*')
{
+ twochar_comment:
for (;;)
{
do
@@ -1097,6 +1168,8 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
}
else if (state == 3)
old_state = 9;
+ else if (state == 0)
+ old_state = 11; /* Now seeing label definition. */
else
old_state = state;
state = 5;
@@ -1243,15 +1316,9 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
{
ch2 = GET ();
if (ch2 == '*')
- {
- old_state = 3;
- state = -2;
- break;
- }
- else if (ch2 != EOF)
- {
- UNGET (ch2);
- }
+ goto twochar_comment;
+ if (ch2 != EOF)
+ UNGET (ch2);
}
if (state == 0 || state == 1) /* Only comment at start of line. */
@@ -1321,14 +1388,10 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
start of a line. If this is also a normal comment
character, fall through. Otherwise treat it as a default
character. */
- if (strchr (tc_comment_chars, ch) == NULL
- && (! scrub_m68k_mri
- || (ch != '!' && ch != '*')))
+ if (strchr (tc_comment_chars, ch) == NULL)
goto de_fault;
if (scrub_m68k_mri
- && (ch == '!' || ch == '*' || ch == '#')
- && state != 1
- && state != 10)
+ && (ch == '!' || ch == '*' || ch == '#'))
goto de_fault;
/* Fall through. */
case LEX_IS_COMMENT_START:
@@ -1426,11 +1489,13 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
/* This is a common case. Quickly copy CH and all the
following symbol component or normal characters. */
if (to + 1 < toend
+#ifdef TC_M68K
&& mri_state == NULL
+#endif
#if defined TC_ARM && defined OBJ_ELF
&& symver_state == NULL
#endif
- )
+ && end_state == NULL)
{
char *s;
ptrdiff_t len;