diff options
-rw-r--r-- | ld/ChangeLog | 19 | ||||
-rw-r--r-- | ld/NEWS | 8 | ||||
-rw-r--r-- | ld/ld.texi | 53 | ||||
-rw-r--r-- | ld/ldgram.y | 10 | ||||
-rw-r--r-- | ld/ldlang.c | 69 | ||||
-rw-r--r-- | ld/ldlang.h | 3 | ||||
-rw-r--r-- | ld/ldlex.l | 1 | ||||
-rw-r--r-- | ld/testsuite/ld-scripts/ascii.d | 25 | ||||
-rw-r--r-- | ld/testsuite/ld-scripts/ascii.s | 11 | ||||
-rw-r--r-- | ld/testsuite/ld-scripts/ascii.t | 38 | ||||
-rw-r--r-- | ld/testsuite/ld-scripts/asciz.d | 19 | ||||
-rw-r--r-- | ld/testsuite/ld-scripts/asciz.t | 23 | ||||
-rw-r--r-- | ld/testsuite/ld-scripts/script.exp | 1 |
13 files changed, 221 insertions, 59 deletions
diff --git a/ld/ChangeLog b/ld/ChangeLog index ac2c913..0fa00a9 100644 --- a/ld/ChangeLog +++ b/ld/ChangeLog @@ -1,3 +1,22 @@ +2023-02-16 Ulf Samuelsson <binutils@emagii.com> + Nick Clifton <nickc@redhat.com> + + * ldlex.l: Add ASCII token. + * ldgram.y: Add parsing of the ASCII command. + * ldlang.c (lang_add_string): Add maximum size parameter. Move + escape character handling code into separate function. + * ldlang.h (lang_add_string): Update prototype. + * NEWS: Mention the new feature. + * ld.texi (Output Section Data): Document the new directives. + * testsuite/ld-scripts/asciz.t: Adjust to work on more architectures + and to test more aspects of the ASCIZ directive. + * testsuite/ld-scripts/asciz.d: Adjust to match the changes to the + test linker script. + * testsuite/ld-scripts/ascii.d: New test driver. + * testsuite/ld-scripts/ascii.s: New test assembler source. + * testsuite/ld-scripts/ascii.t: New test script. + * testsuite/ld-scripts/script.exp: Run the new test. + 2023-02-15 Nick Clifton <nickc@redhat.com> PR 30078 @@ -1,6 +1,12 @@ -*- text -*- -* The linker script syntax has a new command for output sections: ASCIZ "string" +* The linker script syntax has two new commands for inserting text into output + sections: + ASCII (<size>) "string" + This will reserve a zero filled block of <size> bytes at the current + location and insert "string" at the beginning of the block. If the string + is too long, it will be truncated. + ASCIZ "string" This will insert a zero-terminated string at the current location. Changes in 2.40: @@ -5308,7 +5308,6 @@ C identifiers because they contain a @samp{.} character. @cindex data @cindex section data @cindex output section data -@kindex ASCIZ ``@var{string}'' @kindex BYTE(@var{expression}) @kindex SHORT(@var{expression}) @kindex LONG(@var{expression}) @@ -5345,18 +5344,6 @@ When the object file format does not have an explicit endianness, as is true of, for example, S-records, the value will be stored in the endianness of the first input object file. -You can include a zero-terminated string in an output section by using -@code{ASCIZ}. The keyword is followed by a string which is stored at -the current value of the location counter adding a zero byte at the -end. If the string includes spaces it must be enclosed in double -quotes. The string may contain '\n', '\r', '\t' and octal numbers. -Hex numbers are not supported. - -For example, this string of 16 characters will create a 17 byte area -@smallexample - ASCIZ "This is 16 bytes" -@end smallexample - Note---these commands only work inside a section description and not between them, so the following will produce an error from the linker: @smallexample @@ -5367,6 +5354,46 @@ whereas this will work: SECTIONS @{@ .text : @{@ *(.text) ; LONG(1) @}@ .data : @{@ *(.data) @}@ @}@ @end smallexample +@cindex output section strings +@kindex ASCII (@var{expression}) ``@var{string}'' +@kindex ASCIZ ``@var{string}'' +You can include a zero-terminated string in an output section by using +@code{ASCIZ}. The keyword is followed by a string which is stored at +the current value of the location counter including adding a zero byte +at the end. Any length of string is supported by this directive. + +You can include a fixed size string in an output section by using +@code{ASCII}. The keyword is followed by a size enclosed in +parentheses and then a string. The string is stored at the current +value of the location counter and zero bytes are added at the end to +fill up to the specified size. Note the fill value is ignored for +this padding. + +If the string is too long, a warning is issued and the string is +truncated. The string will still be zero-terminated in this case. + +If the expression evaluates to zero then the directive will be treated +as if it were @code{ASCIZ} instead. + +If the string in an @code{ASCII} or @code{ASCIZ} command includes spaces +it must be enclosed in double quotes. + +The string can have C escape characters like '\n', '\r', '\t' and +octal numbers. The '\"' escape is not supported. Nor are escaped hex +values. + +Example 1: This is string of 16 characters and will create a 32 byte +area: +@smallexample + ASCII (32) "This is 16 bytes" +@end smallexample + +Example 2: This is a string of 16 characters and will create a 17 byte +area: +@smallexample + ASCIZ "This is 16 bytes" +@end smallexample + @kindex FILL(@var{expression}) @cindex holes, filling @cindex unspecified memory diff --git a/ld/ldgram.y b/ld/ldgram.y index 8240cf9..faffeec 100644 --- a/ld/ldgram.y +++ b/ld/ldgram.y @@ -125,7 +125,7 @@ static int error_index; %right UNARY %token END %left <token> '(' -%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCIZ +%token <token> ALIGN_K BLOCK BIND QUAD SQUAD LONG SHORT BYTE ASCII ASCIZ %token SECTIONS PHDRS INSERT_K AFTER BEFORE %token DATA_SEGMENT_ALIGN DATA_SEGMENT_RELRO_END DATA_SEGMENT_END %token SORT_BY_NAME SORT_BY_ALIGNMENT SORT_NONE @@ -668,9 +668,15 @@ statement: { lang_add_data ((int) $1, $3); } + | ASCII '(' mustbe_exp ')' NAME + { + /* 'value' is a memory leak, do we care? */ + etree_type *value = $3; + lang_add_string (value->value.value, $5); + } | ASCIZ NAME { - lang_add_string ($2); + lang_add_string (0, $2); } | FILL '(' fill_exp ')' { diff --git a/ld/ldlang.c b/ld/ldlang.c index b20455c..2852a42 100644 --- a/ld/ldlang.c +++ b/ld/ldlang.c @@ -8361,15 +8361,20 @@ lang_add_data (int type, union etree_union *exp) new_stmt->type = type; } -void -lang_add_string (const char *s) +/* Convert escape codes in S. + Supports \n, \r, \t and \NNN octals. + Returns a copy of S in a malloc'ed buffer. */ + +static char * +convert_string (const char * s) { - bfd_vma len = strlen (s); - bfd_vma i; - bool escape = false; + size_t len = strlen (s); + size_t i; + bool escape = false; + char * buffer = malloc (len + 1); + char * b; - /* Add byte expressions until end of string. */ - for (i = 0 ; i < len; i++) + for (i = 0, b = buffer; i < len; i++) { char c = *s++; @@ -8404,7 +8409,7 @@ lang_add_string (const char *s) value += (c - '0'); i++; s++; - + c = *s; if ((c >= '0') && (c <= '7')) { @@ -8422,26 +8427,58 @@ lang_add_string (const char *s) i--; s--; } - + c = value; } break; } - - lang_add_data (BYTE, exp_intop (c)); escape = false; } else { if (c == '\\') - escape = true; - else - lang_add_data (BYTE, exp_intop (c)); + { + escape = true; + continue; + } } + + * b ++ = c; + } + + * b = 0; + return buffer; +} + +void +lang_add_string (size_t size, const char *s) +{ + size_t len; + size_t i; + char * string; + + string = convert_string (s); + len = strlen (string); + + /* Check if it is ASCIZ command (len == 0) */ + if (size == 0) + /* Make sure that we include the terminating nul byte. */ + size = len + 1; + else if (len >= size) + { + len = size - 1; + + einfo (_("%P:%pS: warning: ASCII string does not fit in allocated space," + " truncated\n"), NULL); } - /* Remeber to terminate the string. */ - lang_add_data (BYTE, exp_intop (0)); + for (i = 0 ; i < len ; i++) + lang_add_data (BYTE, exp_intop (string[i])); + + while (i++ < size) + lang_add_data (BYTE, exp_intop ('\0')); + + free (string); } /* Create a new reloc statement. RELOC is the BFD relocation type to diff --git a/ld/ldlang.h b/ld/ldlang.h index 3281906..2300fa5 100644 --- a/ld/ldlang.h +++ b/ld/ldlang.h @@ -646,8 +646,9 @@ extern void pop_stat_ptr (void); extern void lang_add_data (int, union etree_union *); +extern bfd_vma charcount(const char *s); extern void lang_add_string - (const char *); + (size_t, const char *s); extern void lang_add_reloc (bfd_reloc_code_real_type, reloc_howto_type *, asection *, const char *, union etree_union *); @@ -309,6 +309,7 @@ V_IDENTIFIER [*?.$_a-zA-Z\[\]\-\!\^\\]([*?.$_a-zA-Z0-9\[\]\-\!\^\\]|::)* <WILD>"LONG" { RTOKEN(LONG); } <WILD>"SHORT" { RTOKEN(SHORT); } <WILD>"BYTE" { RTOKEN(BYTE); } +<WILD>"ASCII" { RTOKEN(ASCII); } <WILD>"ASCIZ" { RTOKEN(ASCIZ); } <SCRIPT>"NOFLOAT" { RTOKEN(NOFLOAT); } <SCRIPT,EXPRESSION>"NOCROSSREFS" { RTOKEN(NOCROSSREFS); } diff --git a/ld/testsuite/ld-scripts/ascii.d b/ld/testsuite/ld-scripts/ascii.d new file mode 100644 index 0000000..cfc1a4c --- /dev/null +++ b/ld/testsuite/ld-scripts/ascii.d @@ -0,0 +1,25 @@ +#source: ascii.s +#ld: -T ascii.t +#objdump: -s -j .header +#notarget: [is_aout_format] +#skip: tic4x-*-* tic54x-*-* *-*-*ecoff *-*-macho *-*-aix* + +.*: file format .* + +Contents of section .header: + .... 70726f67 72616d20 6e616d65 00000000 program name.... + .... 656d7074 79000000 00000000 00000000 empty........... + .... 00000000 00000000 00000000 00000000 ................ + .... 00000000 00000000 00000000 00000000 ................ + .... 00000000 00000000 00000000 00000000 ................ + .... 636f6d6d 656e7420 310a0000 00000000 comment 1....... + .... 00000000 00000000 00000000 00000000 ................ + .... 636f6d6d 656e7420 320a0000 00000000 comment 2....... + .... 00000000 00000000 00000000 00000000 ................ + .... 636f6d6d 656e7420 330a0000 00000000 comment 3....... + .... 00000000 00000000 00000000 00000000 ................ + .... 636f6d6d 656e7420 340a0000 00000000 comment 4....... + .... 00000000 00000000 49206d65 616e7420 ........I meant + .... 746f2073 61793a20 54686973 20697320 to say: This is + .... 77617920 746f6f20 6c6f6e67 00000000 way too long.... +#pass diff --git a/ld/testsuite/ld-scripts/ascii.s b/ld/testsuite/ld-scripts/ascii.s new file mode 100644 index 0000000..a1b6148 --- /dev/null +++ b/ld/testsuite/ld-scripts/ascii.s @@ -0,0 +1,11 @@ + .extern ecc_start + .section .text +main: + .long 0x45444F43 + .long 0x12345678 + + .section .data + .long 0x9abcdef0 + + .section .bss + .long 0 diff --git a/ld/testsuite/ld-scripts/ascii.t b/ld/testsuite/ld-scripts/ascii.t new file mode 100644 index 0000000..6f682fa --- /dev/null +++ b/ld/testsuite/ld-scripts/ascii.t @@ -0,0 +1,38 @@ +_start = 0x000000; + +SECTIONS +{ + . = 0x1000 + SIZEOF_HEADERS; + + .header ALIGN (0x100) (READONLY) : + { + ASCII (16) "program name" + ASCII (64) "empty" + ASCII (4 * 8) "comment 1\n" + ASCII (32) "comment 2\n" + ASCII (32) "comment 3\n" + ASCII (24) "comment 4\n" + ASCII (64) "I meant to say: This is way too long" + } + + .text ALIGN (0x100) : + { + entry = .; + *(.text) + } + + .data : AT (0x400000) + { + *(.data) + } + + . = ALIGN(0x20); + + .bss : + { + *(.bss) + } + + /DISCARD/ : { *(*) } +} + diff --git a/ld/testsuite/ld-scripts/asciz.d b/ld/testsuite/ld-scripts/asciz.d index 615cf99..75e3c85 100644 --- a/ld/testsuite/ld-scripts/asciz.d +++ b/ld/testsuite/ld-scripts/asciz.d @@ -1,17 +1,14 @@ #source: asciz.s #ld: -T asciz.t -#objdump: -s -j .text -#target: [is_elf_format] -#skip: mips*-*-* -#skip: tilegx*-*-* tilepro-*-* -# COFF, PE and MIPS targets align code to a 16 byte boundary -# tilegx andtilepro aligns code to a 8 byte boundary. +#objdump: -s -j .data +#notarget: [is_aout_format] +#skip: tic4x-*-* tic54x-*-* *-*-*ecoff *-*-macho *-*-aix* .*: file format .* -Contents of section .text: - .... 01010101 54686973 20697320 61207374 ....This is a st - .... 72696e67 00...... ........ ........ ring............ - .... 54686973 20697320 616e6f74 68657220 This is another - .... 0a737472 696e6753 00 .stringS........ +Contents of section .data: + .... 54686973 20697320 61207374 72696e67 This is a string + .... 00546869 73206973 20616e6f 74686572 .This is another + .... 0a537472 696e6700 006e6f71 756f7465 .String..noquote + .... 7300 s. #pass diff --git a/ld/testsuite/ld-scripts/asciz.t b/ld/testsuite/ld-scripts/asciz.t index ab66f9a..3aeb7d0 100644 --- a/ld/testsuite/ld-scripts/asciz.t +++ b/ld/testsuite/ld-scripts/asciz.t @@ -1,23 +1,16 @@ -MEMORY { - rom : ORIGIN = 0x00000, LENGTH = 0x10000 - ram : ORIGIN = 0x10000, LENGTH = 0x10000 -} _start = 0x000000; SECTIONS { . = 0x1000 + SIZEOF_HEADERS; - .text ALIGN (0x20) : - { - *(.text) + + .data : AT (0x10000) + { ASCIZ "This is a string" - . = ALIGN(0x20); - align_label = .; - ASCIZ "This is another \nstring\123" - unalign_label = .; - } - .data : AT (0x10000) { *(.data) } >ram /* NO default AT>rom */ - . = ALIGN(0x20); - .bss : { *(.bss) } >ram /* NO default AT>rom */ + ASCIZ "This is another\n\123tring" + ASCIZ "" + ASCIZ noquotes + } + /DISCARD/ : { *(*) } } diff --git a/ld/testsuite/ld-scripts/script.exp b/ld/testsuite/ld-scripts/script.exp index a574dde..56e12da 100644 --- a/ld/testsuite/ld-scripts/script.exp +++ b/ld/testsuite/ld-scripts/script.exp @@ -228,6 +228,7 @@ foreach test_script $test_script_list { } run_dump_test "asciz" +run_dump_test "ascii" run_dump_test "align-with-input" run_dump_test "pr20302" run_dump_test "output-section-types" |