diff options
author | Nick Clifton <nickc@redhat.com> | 2024-07-10 15:01:39 +0100 |
---|---|---|
committer | Nick Clifton <nickc@redhat.com> | 2024-07-10 15:01:39 +0100 |
commit | 479edf0a6a61159486f14d5e62403f8769cc591d (patch) | |
tree | 13bb1c2eef62121a3796258e52d6ae952467d79c | |
parent | 1ca89940494ae2b60647c8b2ac8fb91035a95b0a (diff) | |
download | gdb-479edf0a6a61159486f14d5e62403f8769cc591d.zip gdb-479edf0a6a61159486f14d5e62403f8769cc591d.tar.gz gdb-479edf0a6a61159486f14d5e62403f8769cc591d.tar.bz2 |
Add support for a .base64 pseudo-op to gas
PR 31964
-rw-r--r-- | gas/NEWS | 3 | ||||
-rw-r--r-- | gas/doc/as.texi | 14 | ||||
-rw-r--r-- | gas/read.c | 323 | ||||
-rw-r--r-- | gas/read.h | 1 | ||||
-rw-r--r-- | gas/testsuite/gas/all/base64-bad.d | 5 | ||||
-rw-r--r-- | gas/testsuite/gas/all/base64-bad.l | 11 | ||||
-rw-r--r-- | gas/testsuite/gas/all/base64-bad.s | 10 | ||||
-rw-r--r-- | gas/testsuite/gas/all/base64.d | 20 | ||||
-rw-r--r-- | gas/testsuite/gas/all/base64.s | 8 | ||||
-rw-r--r-- | gas/testsuite/gas/all/gas.exp | 3 |
10 files changed, 398 insertions, 0 deletions
@@ -1,5 +1,8 @@ -*- text -*- +* Add a .base64 directive to the assembler which allows base64 encoded + binary data to be provided as strings. + * Add support for 'armv9.5-a' for -march in AArch64 GAS. * In x86 Intel syntax undue mnemonic suffixes are now warned about. This is diff --git a/gas/doc/as.texi b/gas/doc/as.texi index 33169a5..02db6cb 100644 --- a/gas/doc/as.texi +++ b/gas/doc/as.texi @@ -4497,6 +4497,7 @@ Some machine configurations provide additional directives. * Asciz:: @code{.asciz "@var{string}"}@dots{} * Attach_to_group:: @code{.attach_to_group @var{name}} * Balign:: @code{.balign [@var{abs-expr}[, @var{abs-expr}]]} +* Base64:: @code{.base64 "@var{string}"[, @dots{}]} @ifset ELF * Bss:: @code{.bss @var{subsection}} @end ifset @@ -4851,6 +4852,19 @@ filled in with the value 0x368d (the exact placement of the bytes depends upon the endianness of the processor). If it skips 1 or 3 bytes, the fill value is undefined. +@node Base64 +@section @code{.base64 "@var{string}"[, @dots{}]} +Allows binary data to be entered into a section encoded as a base64 string. +There is no maximum length to the strings, but they must be a multiple of four +bytes long. If necessary the ends of the strings can be padded with @code{=} +characters. Line breaks, control characters and escaped characters are not +allowed in the strings. The strings must be enclosed between double quote +characters. Multiple strings are allowed, but they must be separated by +commas. + +As an example of how to create a base64 encoded string, see the +@command{base64} program (with its @option{-w0} option to disable line breaks. + @ifset ELF @node Bss @section @code{.bss @var{subsection}} @@ -357,6 +357,7 @@ static const pseudo_typeS potable[] = { {"balign", s_align_bytes, 0}, {"balignw", s_align_bytes, -2}, {"balignl", s_align_bytes, -4}, + {"base64", s_base64, 0}, /* block */ #ifdef HANDLE_BUNDLE {"bundle_align_mode", s_bundle_align_mode, 0}, @@ -5448,6 +5449,328 @@ stringer_append_char (int c, int bitsize) FRAG_APPEND_1_CHAR (c); } +/* Code for handling base64 encoded strings. + Based upon code in sharutils' lib/base64.c source file, written by + Simon Josefsson. Which was partially adapted from GNU MailUtils + (mailbox/filter_trans.c, as of 2004-11-28) and improved by review + from Paul Eggert, Bruno Haible, and Stepan Kasal. */ + +#define B64(_) \ + ( (_) == 'A' ? 0 \ + : (_) == 'B' ? 1 \ + : (_) == 'C' ? 2 \ + : (_) == 'D' ? 3 \ + : (_) == 'E' ? 4 \ + : (_) == 'F' ? 5 \ + : (_) == 'G' ? 6 \ + : (_) == 'H' ? 7 \ + : (_) == 'I' ? 8 \ + : (_) == 'J' ? 9 \ + : (_) == 'K' ? 10 \ + : (_) == 'L' ? 11 \ + : (_) == 'M' ? 12 \ + : (_) == 'N' ? 13 \ + : (_) == 'O' ? 14 \ + : (_) == 'P' ? 15 \ + : (_) == 'Q' ? 16 \ + : (_) == 'R' ? 17 \ + : (_) == 'S' ? 18 \ + : (_) == 'T' ? 19 \ + : (_) == 'U' ? 20 \ + : (_) == 'V' ? 21 \ + : (_) == 'W' ? 22 \ + : (_) == 'X' ? 23 \ + : (_) == 'Y' ? 24 \ + : (_) == 'Z' ? 25 \ + : (_) == 'a' ? 26 \ + : (_) == 'b' ? 27 \ + : (_) == 'c' ? 28 \ + : (_) == 'd' ? 29 \ + : (_) == 'e' ? 30 \ + : (_) == 'f' ? 31 \ + : (_) == 'g' ? 32 \ + : (_) == 'h' ? 33 \ + : (_) == 'i' ? 34 \ + : (_) == 'j' ? 35 \ + : (_) == 'k' ? 36 \ + : (_) == 'l' ? 37 \ + : (_) == 'm' ? 38 \ + : (_) == 'n' ? 39 \ + : (_) == 'o' ? 40 \ + : (_) == 'p' ? 41 \ + : (_) == 'q' ? 42 \ + : (_) == 'r' ? 43 \ + : (_) == 's' ? 44 \ + : (_) == 't' ? 45 \ + : (_) == 'u' ? 46 \ + : (_) == 'v' ? 47 \ + : (_) == 'w' ? 48 \ + : (_) == 'x' ? 49 \ + : (_) == 'y' ? 50 \ + : (_) == 'z' ? 51 \ + : (_) == '0' ? 52 \ + : (_) == '1' ? 53 \ + : (_) == '2' ? 54 \ + : (_) == '3' ? 55 \ + : (_) == '4' ? 56 \ + : (_) == '5' ? 57 \ + : (_) == '6' ? 58 \ + : (_) == '7' ? 59 \ + : (_) == '8' ? 60 \ + : (_) == '9' ? 61 \ + : (_) == '+' ? 62 \ + : (_) == '/' ? 63 \ + : -1) + +static const signed char b64[0x100] = +{ + B64 (0), B64 (1), B64 (2), B64 (3), + B64 (4), B64 (5), B64 (6), B64 (7), + B64 (8), B64 (9), B64 (10), B64 (11), + B64 (12), B64 (13), B64 (14), B64 (15), + B64 (16), B64 (17), B64 (18), B64 (19), + B64 (20), B64 (21), B64 (22), B64 (23), + B64 (24), B64 (25), B64 (26), B64 (27), + B64 (28), B64 (29), B64 (30), B64 (31), + B64 (32), B64 (33), B64 (34), B64 (35), + B64 (36), B64 (37), B64 (38), B64 (39), + B64 (40), B64 (41), B64 (42), B64 (43), + B64 (44), B64 (45), B64 (46), B64 (47), + B64 (48), B64 (49), B64 (50), B64 (51), + B64 (52), B64 (53), B64 (54), B64 (55), + B64 (56), B64 (57), B64 (58), B64 (59), + B64 (60), B64 (61), B64 (62), B64 (63), + B64 (64), B64 (65), B64 (66), B64 (67), + B64 (68), B64 (69), B64 (70), B64 (71), + B64 (72), B64 (73), B64 (74), B64 (75), + B64 (76), B64 (77), B64 (78), B64 (79), + B64 (80), B64 (81), B64 (82), B64 (83), + B64 (84), B64 (85), B64 (86), B64 (87), + B64 (88), B64 (89), B64 (90), B64 (91), + B64 (92), B64 (93), B64 (94), B64 (95), + B64 (96), B64 (97), B64 (98), B64 (99), + B64 (100), B64 (101), B64 (102), B64 (103), + B64 (104), B64 (105), B64 (106), B64 (107), + B64 (108), B64 (109), B64 (110), B64 (111), + B64 (112), B64 (113), B64 (114), B64 (115), + B64 (116), B64 (117), B64 (118), B64 (119), + B64 (120), B64 (121), B64 (122), B64 (123), + B64 (124), B64 (125), B64 (126), B64 (127), + B64 (128), B64 (129), B64 (130), B64 (131), + B64 (132), B64 (133), B64 (134), B64 (135), + B64 (136), B64 (137), B64 (138), B64 (139), + B64 (140), B64 (141), B64 (142), B64 (143), + B64 (144), B64 (145), B64 (146), B64 (147), + B64 (148), B64 (149), B64 (150), B64 (151), + B64 (152), B64 (153), B64 (154), B64 (155), + B64 (156), B64 (157), B64 (158), B64 (159), + B64 (160), B64 (161), B64 (162), B64 (163), + B64 (164), B64 (165), B64 (166), B64 (167), + B64 (168), B64 (169), B64 (170), B64 (171), + B64 (172), B64 (173), B64 (174), B64 (175), + B64 (176), B64 (177), B64 (178), B64 (179), + B64 (180), B64 (181), B64 (182), B64 (183), + B64 (184), B64 (185), B64 (186), B64 (187), + B64 (188), B64 (189), B64 (190), B64 (191), + B64 (192), B64 (193), B64 (194), B64 (195), + B64 (196), B64 (197), B64 (198), B64 (199), + B64 (200), B64 (201), B64 (202), B64 (203), + B64 (204), B64 (205), B64 (206), B64 (207), + B64 (208), B64 (209), B64 (210), B64 (211), + B64 (212), B64 (213), B64 (214), B64 (215), + B64 (216), B64 (217), B64 (218), B64 (219), + B64 (220), B64 (221), B64 (222), B64 (223), + B64 (224), B64 (225), B64 (226), B64 (227), + B64 (228), B64 (229), B64 (230), B64 (231), + B64 (232), B64 (233), B64 (234), B64 (235), + B64 (236), B64 (237), B64 (238), B64 (239), + B64 (240), B64 (241), B64 (242), B64 (243), + B64 (244), B64 (245), B64 (246), B64 (247), + B64 (248), B64 (249), B64 (250), B64 (251), + B64 (252), B64 (253), B64 (254), B64 (255) +}; + +static bool +is_base64_char (unsigned int c) +{ + return (c < 0x100) && (b64[c] != -1); +} + +static void +decode_base64_and_append (unsigned int b[4], int len) +{ + gas_assert (len > 1); + + FRAG_APPEND_1_CHAR ((b64[b[0]] << 2) | (b64[b[1]] >> 4)); + if (len == 2) return; + FRAG_APPEND_1_CHAR (((b64[b[1]] << 4) & 0xf0) | (b64[b[2]] >> 2)); + if (len == 3) return; + FRAG_APPEND_1_CHAR (((b64[b[2]] << 6) & 0xc0) | b64[b[3]]); +} + +/* Accept one or more comma separated, base64 encoded strings. Decode them + and store them at the current point in the current section. The strings + must be enclosed in double quotes. Line breaks, quoted characters and + escaped characters are not allowed. Only the characters "A-Za-z0-9+/" are + accepted inside the string. The string must be a multiple of four + characters in length. If the encoded string does not fit this requirement + it may use one or more '=' characters at the end as padding. */ + +void +s_base64 (int dummy ATTRIBUTE_UNUSED) +{ + unsigned int c; + + /* If we have been switched into the abs_section then we + will not have an obstack onto which we can hang strings. */ + if (now_seg == absolute_section) + { + as_bad (_("base64 strings must be placed into a section")); + ignore_rest_of_line (); + return; + } + + if (is_it_end_of_statement ()) + { + as_bad (_("a string must follow the .base64 pseudo-op")); + return; + } + +#ifdef md_flush_pending_output + md_flush_pending_output (); +#endif + +#ifdef md_cons_align + md_cons_align (1); +#endif + + do + { + SKIP_ALL_WHITESPACE (); + + c = * input_line_pointer ++; + + if (c != '"') + { + as_bad (_("expected double quote enclosed string as argument to .base64 pseudo-op")); + ignore_rest_of_line (); + return; + } + + /* Read a block of four base64 encoded characters. */ + int i; + unsigned int b[4]; + bool seen_equals = false; + + loop: + for (i = 0; i < 4; i++) + { + c = * input_line_pointer ++; + + if (c >= 256 || is_end_of_line [c]) + { + as_bad (_("end of line encountered inside .base64 string")); + ignore_rest_of_line (); + return; + } + + if (c == '"') + { + /* We allow this. But only if there were enough + characters to form a valid base64 encoding. */ + if (i > 1) + { + as_warn (_(".base64 string terminated early")); + -- input_line_pointer; + break; + } + + as_bad (_(".base64 string terminated unexpectedly")); + ignore_rest_of_line (); + return; + } + + if (seen_equals && c != '=') + { + as_bad (_("equals character only allowed at end of .base64 string")); + ignore_rest_of_line (); + return; + } + + if (c == '=') + { + if (i == 0) + { + as_bad (_("the equals character cannot start a block of four base64 encoded bytes")); + ignore_rest_of_line (); + return; + } + else if (i == 1) + { + as_bad (_("the equals character cannot be the second character in a block of four base64 encoded bytes")); + ignore_rest_of_line (); + return; + } + + seen_equals = true; + } + else if (! is_base64_char (c)) + { + if (ISPRINT (c)) + as_bad (_("invalid character '%c' found inside .base64 string"), c); + else + as_bad (_("invalid character %#x found inside .base64 string"), c); + ignore_rest_of_line (); + return; + } + + b[i] = c; + } + + if (seen_equals && i == 4) + { + -- i; + if (b[2] == '=') + -- i; + } + + /* We have a block of up to four valid base64 encoded bytes. */ + decode_base64_and_append (b, i); + + /* Check the next character. */ + c = * input_line_pointer ++; + + if (is_base64_char (c)) + { + if (seen_equals) + { + as_bad (_("no base64 characters expected after '=' padding characters")); + ignore_rest_of_line (); + return; + } + + -- input_line_pointer; + goto loop; + } + else if (c != '"') + { + as_bad (_(".base64 string must have a terminating double quote character")); + ignore_rest_of_line (); + return; + } + + SKIP_ALL_WHITESPACE (); + + c = * input_line_pointer ++; + } + while (c == ','); + + -- input_line_pointer; + + demand_empty_rest_of_line (); +} + + /* Worker to do .ascii etc statements. Reads 0 or more ',' separated, double-quoted strings. Caller should have checked need_pass_2 is FALSE because we don't @@ -215,6 +215,7 @@ extern void s_nops (int); extern void s_stab (int what); extern void s_struct (int); extern void s_text (int); +extern void s_base64 (int); extern void stringer (int append_zero); extern void s_xstab (int what); extern void s_rva (int); diff --git a/gas/testsuite/gas/all/base64-bad.d b/gas/testsuite/gas/all/base64-bad.d new file mode 100644 index 0000000..d613b3e --- /dev/null +++ b/gas/testsuite/gas/all/base64-bad.d @@ -0,0 +1,5 @@ +#name: .base64 errors test +#source: base64-bad.s +#error_output: base64-bad.l +# The Z80 backend has its own error message for an unterminated string. +#notarget: z80-* diff --git a/gas/testsuite/gas/all/base64-bad.l b/gas/testsuite/gas/all/base64-bad.l new file mode 100644 index 0000000..a860a1d --- /dev/null +++ b/gas/testsuite/gas/all/base64-bad.l @@ -0,0 +1,11 @@ +.*: Assembler messages: +.*: Warning: end of file in string; '"' inserted +.*2: Warning: .base64 string terminated early +.*3: Error: .base64 string terminated unexpectedly +.*4: Error: invalid character ' ' found inside .base64 string +.*5: Error: the equals character cannot start a block of four base64 encoded bytes +.*6: Error: the equals character cannot be the second character in a block of four base64 encoded bytes +.*7: Error: no base64 characters expected after '=' padding characters +.*8: Error: junk at end of line, first unrecognized character is `"' +.*9: Error: expected double quote enclosed string as argument to .base64 pseudo-op +.*10: Error: end of line encountered inside .base64 string diff --git a/gas/testsuite/gas/all/base64-bad.s b/gas/testsuite/gas/all/base64-bad.s new file mode 100644 index 0000000..99c3bc2 --- /dev/null +++ b/gas/testsuite/gas/all/base64-bad.s @@ -0,0 +1,10 @@ + .data + .base64 "1234567" + .base64 "NotInASection" + .base64 "invalid character" + .base64 "====" + .base64 "A===" + .base64 "in==side" + .base64 "miss" "comma" + .base64 no quotes + .base64 "noclosingcomma diff --git a/gas/testsuite/gas/all/base64.d b/gas/testsuite/gas/all/base64.d new file mode 100644 index 0000000..11410a7 --- /dev/null +++ b/gas/testsuite/gas/all/base64.d @@ -0,0 +1,20 @@ +#readelf: -x .data +#name : .base64 test +# Weirdly the binutils/gas test harness does not support a binary +# dumping program like od. The closes we can get is readelf, +# but of course this only works for ELF binaries. Hence the +# restrictiopn below. In theory though the .base64 directive +# will work for all output formats, even if we cannot test it. +#target: [is_elf_format] + +Hex dump of section '.data': + 0x00000000 68656c6c 6f20776f 726c640a 70616464 hello world.padd + 0x00000010 696e6720 68657265 0a454c46 02010103 ing here.ELF.... + 0x00000020 00000000 00000000 02003e00 01000000 ..........>..... + 0x00000030 50d34000 00000000 40000000 00000000 P.@.....@....... + 0x00000040 d8204b10 00000000 00000000 40003800 . K.........@.8. + 0x00000050 0e004000 2c002b00 06000000 04000000 ..@.,.+......... + 0x00000060 40000000 00000000 40004000 00000000 @.......@.@..... + 0x00000070 40004000 00000000 10030000 00000000 @.@............. + 0x00000080 10030000 00000000 08000000 00000000 ................ +#pass diff --git a/gas/testsuite/gas/all/base64.s b/gas/testsuite/gas/all/base64.s new file mode 100644 index 0000000..d176467 --- /dev/null +++ b/gas/testsuite/gas/all/base64.s @@ -0,0 +1,8 @@ + .data + .global hello +hello: + .base64 "aGVsbG8gd29ybGQK", "cGFkZGluZyBoZXJlCg==" + + .global very_long +very_long: + .base64 "RUxGAgEBAwAAAAAAAAAAAgA+AAEAAABQ00AAAAAAAEAAAAAAAAAA2CBLEAAAAAAAAAAAQAA4AA4AQAAsACsABgAAAAQAAABAAAAAAAAAAEAAQAAAAAAAQABAAAAAAAAQAwAAAAAAABADAAAAAAAACAAAAAAAAAAAAAAAAAAAAAA=" diff --git a/gas/testsuite/gas/all/gas.exp b/gas/testsuite/gas/all/gas.exp index d262944..32363c5 100644 --- a/gas/testsuite/gas/all/gas.exp +++ b/gas/testsuite/gas/all/gas.exp @@ -468,6 +468,9 @@ gas_test_error "weakref2.s" "" "e: would close weakref loop: e => a => b => c => gas_test_error "weakref3.s" "" "a: would close weakref loop: a => b => c => d => e => a" gas_test_error "weakref4.s" "" "is already defined" +run_dump_test base64 +run_dump_test base64-bad + run_dump_test string if [is_elf_format] { run_dump_test none |