aboutsummaryrefslogtreecommitdiff
path: root/ld
diff options
context:
space:
mode:
authorAlan Modra <amodra@gmail.com>2021-02-01 09:15:41 +1030
committerAlan Modra <amodra@gmail.com>2021-02-02 01:27:12 +1030
commit40726f16a8d7105761e36398054860a923d4efc9 (patch)
tree06d77510d97baeed95b4e7f56c0c20ed5cc8bdad /ld
parent82a1fd3a4935fe665cf08bc6820942c4a091184c (diff)
downloadgdb-40726f16a8d7105761e36398054860a923d4efc9.zip
gdb-40726f16a8d7105761e36398054860a923d4efc9.tar.gz
gdb-40726f16a8d7105761e36398054860a923d4efc9.tar.bz2
ld script expression parsing
Parsing symbol or file/section names in ld linker scripts is a little complicated. Inside SECTIONS, a name might be the start of an expression or an output section. Is ".foo=x-y" a fancy section name or is it the expression ".foo = x - y"? It isn't possible for a single lookahead parser to decide, so the answer in this case is that it's a section name. This is the reason why everyone writes linker script assignment expressions with lots of white-space. However, there are many places where the parser knows for sure that an expression is expected. Those could be written without whitespace given the first change to ldlex.l below. Unfortunately, that runs into a lookahead problem. Optional expressions at the end of an output section statement require the parser to look ahead one token in expression context. For this example from standard scripts .interp : { *(.interp) } .note.gnu.build-id : { *(.note.gnu.build-id) } at the end of the .interp closing brace, the parser is looking for a possible memspec, phdr, fill or even an optional comma. The next token is a NAME, but in expression context that NAME now doesn't include '-' as a valid char. So the lookahead NAME is ".note.gnu.build" with an unexpected "-id" syntax error before the colon. The rest of the patch involving ldlex_backup arranges to discard that NAME token so that it will be rescanned in the proper script context. * ldgram.y (section): Call ldlex_backup. Remove empty action. * ldlex.h (ldlex_backup): Declare. * ldlex.l (<EXPRESSION>NAME): Don't use NOCFILENAMECHAR set of chars, use SYMBOLNAMECHAR. (ldlex_backup): New function.
Diffstat (limited to 'ld')
-rw-r--r--ld/ChangeLog8
-rw-r--r--ld/ldgram.y11
-rw-r--r--ld/ldlex.h1
-rw-r--r--ld/ldlex.l12
4 files changed, 30 insertions, 2 deletions
diff --git a/ld/ChangeLog b/ld/ChangeLog
index 5b6ead7..c5f73f8 100644
--- a/ld/ChangeLog
+++ b/ld/ChangeLog
@@ -1,5 +1,13 @@
2021-02-01 Alan Modra <amodra@gmail.com>
+ * ldgram.y (section): Call ldlex_backup. Remove empty action.
+ * ldlex.h (ldlex_backup): Declare.
+ * ldlex.l (<EXPRESSION>NAME): Don't use NOCFILENAMECHAR set of
+ chars, use SYMBOLNAMECHAR.
+ (ldlex_backup): New function.
+
+2021-02-01 Alan Modra <amodra@gmail.com>
+
* ldgram.y: Whitespace fixes.
2021-02-01 Alan Modra <amodra@gmail.com>
diff --git a/ld/ldgram.y b/ld/ldgram.y
index b0a4619..08dc110 100644
--- a/ld/ldgram.y
+++ b/ld/ldgram.y
@@ -1071,11 +1071,15 @@ section: NAME { ldlex_expression(); }
'}' { ldlex_popstate (); ldlex_expression (); }
memspec_opt memspec_at_opt phdr_opt fill_opt
{
+ if (yychar == NAME)
+ {
+ yyclearin;
+ ldlex_backup ();
+ }
ldlex_popstate ();
lang_leave_output_section_statement ($18, $15, $17, $16);
}
opt_comma
- {}
| OVERLAY
{ ldlex_expression (); }
opt_exp_without_type opt_nocrossrefs opt_at opt_subalign
@@ -1089,6 +1093,11 @@ section: NAME { ldlex_expression(); }
{ ldlex_popstate (); ldlex_expression (); }
memspec_opt memspec_at_opt phdr_opt fill_opt
{
+ if (yychar == NAME)
+ {
+ yyclearin;
+ ldlex_backup ();
+ }
ldlex_popstate ();
lang_leave_overlay ($5, (int) $4,
$16, $13, $15, $14);
diff --git a/ld/ldlex.h b/ld/ldlex.h
index b010102..d9b36ea 100644
--- a/ld/ldlex.h
+++ b/ld/ldlex.h
@@ -191,6 +191,7 @@ extern void ldlex_defsym (void);
extern void ldlex_expression (void);
extern void ldlex_both (void);
extern void ldlex_popstate (void);
+extern void ldlex_backup (void);
extern const char* ldlex_filename (void);
/* In lexsup.c. */
diff --git a/ld/ldlex.l b/ld/ldlex.l
index 237892c..7652e8d 100644
--- a/ld/ldlex.l
+++ b/ld/ldlex.l
@@ -385,7 +385,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)*
yylval.name = xstrdup (yytext + 2);
return LNAME;
}
-<EXPRESSION>{SYMBOLNAMECHAR1}{NOCFILENAMECHAR}* {
+<EXPRESSION>{SYMBOLNAMECHAR1}{SYMBOLNAMECHAR}* {
yylval.name = xstrdup (yytext);
return NAME;
}
@@ -636,6 +636,16 @@ ldlex_popstate (void)
yy_start = *(--state_stack_p);
}
+/* In cases where the parser needs to look ahead and the context
+ changes from expression to script or vice-versa, throw away a
+ NAME. What constitutes a NAME depends on context. */
+
+void
+ldlex_backup (void)
+{
+ yyless (0);
+}
+
/* Return the current file name, or the previous file if no file is
current. */