aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Clifton <nickc@redhat.com>2024-07-10 15:01:39 +0100
committerNick Clifton <nickc@redhat.com>2024-07-10 15:01:39 +0100
commit479edf0a6a61159486f14d5e62403f8769cc591d (patch)
tree13bb1c2eef62121a3796258e52d6ae952467d79c
parent1ca89940494ae2b60647c8b2ac8fb91035a95b0a (diff)
downloadgdb-479edf0a6a61159486f14d5e62403f8769cc591d.zip
gdb-479edf0a6a61159486f14d5e62403f8769cc591d.tar.gz
gdb-479edf0a6a61159486f14d5e62403f8769cc591d.tar.bz2
Add support for a .base64 pseudo-op to gas
PR 31964
-rw-r--r--gas/NEWS3
-rw-r--r--gas/doc/as.texi14
-rw-r--r--gas/read.c323
-rw-r--r--gas/read.h1
-rw-r--r--gas/testsuite/gas/all/base64-bad.d5
-rw-r--r--gas/testsuite/gas/all/base64-bad.l11
-rw-r--r--gas/testsuite/gas/all/base64-bad.s10
-rw-r--r--gas/testsuite/gas/all/base64.d20
-rw-r--r--gas/testsuite/gas/all/base64.s8
-rw-r--r--gas/testsuite/gas/all/gas.exp3
10 files changed, 398 insertions, 0 deletions
diff --git a/gas/NEWS b/gas/NEWS
index d0eb0f7..be14ec6 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,8 @@
-*- text -*-
+* Add a .base64 directive to the assembler which allows base64 encoded
+ binary data to be provided as strings.
+
* Add support for 'armv9.5-a' for -march in AArch64 GAS.
* In x86 Intel syntax undue mnemonic suffixes are now warned about. This is
diff --git a/gas/doc/as.texi b/gas/doc/as.texi
index 33169a5..02db6cb 100644
--- a/gas/doc/as.texi
+++ b/gas/doc/as.texi
@@ -4497,6 +4497,7 @@ Some machine configurations provide additional directives.
* Asciz:: @code{.asciz "@var{string}"}@dots{}
* Attach_to_group:: @code{.attach_to_group @var{name}}
* Balign:: @code{.balign [@var{abs-expr}[, @var{abs-expr}]]}
+* Base64:: @code{.base64 "@var{string}"[, @dots{}]}
@ifset ELF
* Bss:: @code{.bss @var{subsection}}
@end ifset
@@ -4851,6 +4852,19 @@ filled in with the value 0x368d (the exact placement of the bytes depends upon
the endianness of the processor). If it skips 1 or 3 bytes, the fill value is
undefined.
+@node Base64
+@section @code{.base64 "@var{string}"[, @dots{}]}
+Allows binary data to be entered into a section encoded as a base64 string.
+There is no maximum length to the strings, but they must be a multiple of four
+bytes long. If necessary the ends of the strings can be padded with @code{=}
+characters. Line breaks, control characters and escaped characters are not
+allowed in the strings. The strings must be enclosed between double quote
+characters. Multiple strings are allowed, but they must be separated by
+commas.
+
+As an example of how to create a base64 encoded string, see the
+@command{base64} program (with its @option{-w0} option to disable line breaks.
+
@ifset ELF
@node Bss
@section @code{.bss @var{subsection}}
diff --git a/gas/read.c b/gas/read.c
index 5b411e9..e6bbb89 100644
--- a/gas/read.c
+++ b/gas/read.c
@@ -357,6 +357,7 @@ static const pseudo_typeS potable[] = {
{"balign", s_align_bytes, 0},
{"balignw", s_align_bytes, -2},
{"balignl", s_align_bytes, -4},
+ {"base64", s_base64, 0},
/* block */
#ifdef HANDLE_BUNDLE
{"bundle_align_mode", s_bundle_align_mode, 0},
@@ -5448,6 +5449,328 @@ stringer_append_char (int c, int bitsize)
FRAG_APPEND_1_CHAR (c);
}
+/* Code for handling base64 encoded strings.
+ Based upon code in sharutils' lib/base64.c source file, written by
+ Simon Josefsson. Which was partially adapted from GNU MailUtils
+ (mailbox/filter_trans.c, as of 2004-11-28) and improved by review
+ from Paul Eggert, Bruno Haible, and Stepan Kasal. */
+
+#define B64(_) \
+ ( (_) == 'A' ? 0 \
+ : (_) == 'B' ? 1 \
+ : (_) == 'C' ? 2 \
+ : (_) == 'D' ? 3 \
+ : (_) == 'E' ? 4 \
+ : (_) == 'F' ? 5 \
+ : (_) == 'G' ? 6 \
+ : (_) == 'H' ? 7 \
+ : (_) == 'I' ? 8 \
+ : (_) == 'J' ? 9 \
+ : (_) == 'K' ? 10 \
+ : (_) == 'L' ? 11 \
+ : (_) == 'M' ? 12 \
+ : (_) == 'N' ? 13 \
+ : (_) == 'O' ? 14 \
+ : (_) == 'P' ? 15 \
+ : (_) == 'Q' ? 16 \
+ : (_) == 'R' ? 17 \
+ : (_) == 'S' ? 18 \
+ : (_) == 'T' ? 19 \
+ : (_) == 'U' ? 20 \
+ : (_) == 'V' ? 21 \
+ : (_) == 'W' ? 22 \
+ : (_) == 'X' ? 23 \
+ : (_) == 'Y' ? 24 \
+ : (_) == 'Z' ? 25 \
+ : (_) == 'a' ? 26 \
+ : (_) == 'b' ? 27 \
+ : (_) == 'c' ? 28 \
+ : (_) == 'd' ? 29 \
+ : (_) == 'e' ? 30 \
+ : (_) == 'f' ? 31 \
+ : (_) == 'g' ? 32 \
+ : (_) == 'h' ? 33 \
+ : (_) == 'i' ? 34 \
+ : (_) == 'j' ? 35 \
+ : (_) == 'k' ? 36 \
+ : (_) == 'l' ? 37 \
+ : (_) == 'm' ? 38 \
+ : (_) == 'n' ? 39 \
+ : (_) == 'o' ? 40 \
+ : (_) == 'p' ? 41 \
+ : (_) == 'q' ? 42 \
+ : (_) == 'r' ? 43 \
+ : (_) == 's' ? 44 \
+ : (_) == 't' ? 45 \
+ : (_) == 'u' ? 46 \
+ : (_) == 'v' ? 47 \
+ : (_) == 'w' ? 48 \
+ : (_) == 'x' ? 49 \
+ : (_) == 'y' ? 50 \
+ : (_) == 'z' ? 51 \
+ : (_) == '0' ? 52 \
+ : (_) == '1' ? 53 \
+ : (_) == '2' ? 54 \
+ : (_) == '3' ? 55 \
+ : (_) == '4' ? 56 \
+ : (_) == '5' ? 57 \
+ : (_) == '6' ? 58 \
+ : (_) == '7' ? 59 \
+ : (_) == '8' ? 60 \
+ : (_) == '9' ? 61 \
+ : (_) == '+' ? 62 \
+ : (_) == '/' ? 63 \
+ : -1)
+
+static const signed char b64[0x100] =
+{
+ B64 (0), B64 (1), B64 (2), B64 (3),
+ B64 (4), B64 (5), B64 (6), B64 (7),
+ B64 (8), B64 (9), B64 (10), B64 (11),
+ B64 (12), B64 (13), B64 (14), B64 (15),
+ B64 (16), B64 (17), B64 (18), B64 (19),
+ B64 (20), B64 (21), B64 (22), B64 (23),
+ B64 (24), B64 (25), B64 (26), B64 (27),
+ B64 (28), B64 (29), B64 (30), B64 (31),
+ B64 (32), B64 (33), B64 (34), B64 (35),
+ B64 (36), B64 (37), B64 (38), B64 (39),
+ B64 (40), B64 (41), B64 (42), B64 (43),
+ B64 (44), B64 (45), B64 (46), B64 (47),
+ B64 (48), B64 (49), B64 (50), B64 (51),
+ B64 (52), B64 (53), B64 (54), B64 (55),
+ B64 (56), B64 (57), B64 (58), B64 (59),
+ B64 (60), B64 (61), B64 (62), B64 (63),
+ B64 (64), B64 (65), B64 (66), B64 (67),
+ B64 (68), B64 (69), B64 (70), B64 (71),
+ B64 (72), B64 (73), B64 (74), B64 (75),
+ B64 (76), B64 (77), B64 (78), B64 (79),
+ B64 (80), B64 (81), B64 (82), B64 (83),
+ B64 (84), B64 (85), B64 (86), B64 (87),
+ B64 (88), B64 (89), B64 (90), B64 (91),
+ B64 (92), B64 (93), B64 (94), B64 (95),
+ B64 (96), B64 (97), B64 (98), B64 (99),
+ B64 (100), B64 (101), B64 (102), B64 (103),
+ B64 (104), B64 (105), B64 (106), B64 (107),
+ B64 (108), B64 (109), B64 (110), B64 (111),
+ B64 (112), B64 (113), B64 (114), B64 (115),
+ B64 (116), B64 (117), B64 (118), B64 (119),
+ B64 (120), B64 (121), B64 (122), B64 (123),
+ B64 (124), B64 (125), B64 (126), B64 (127),
+ B64 (128), B64 (129), B64 (130), B64 (131),
+ B64 (132), B64 (133), B64 (134), B64 (135),
+ B64 (136), B64 (137), B64 (138), B64 (139),
+ B64 (140), B64 (141), B64 (142), B64 (143),
+ B64 (144), B64 (145), B64 (146), B64 (147),
+ B64 (148), B64 (149), B64 (150), B64 (151),
+ B64 (152), B64 (153), B64 (154), B64 (155),
+ B64 (156), B64 (157), B64 (158), B64 (159),
+ B64 (160), B64 (161), B64 (162), B64 (163),
+ B64 (164), B64 (165), B64 (166), B64 (167),
+ B64 (168), B64 (169), B64 (170), B64 (171),
+ B64 (172), B64 (173), B64 (174), B64 (175),
+ B64 (176), B64 (177), B64 (178), B64 (179),
+ B64 (180), B64 (181), B64 (182), B64 (183),
+ B64 (184), B64 (185), B64 (186), B64 (187),
+ B64 (188), B64 (189), B64 (190), B64 (191),
+ B64 (192), B64 (193), B64 (194), B64 (195),
+ B64 (196), B64 (197), B64 (198), B64 (199),
+ B64 (200), B64 (201), B64 (202), B64 (203),
+ B64 (204), B64 (205), B64 (206), B64 (207),
+ B64 (208), B64 (209), B64 (210), B64 (211),
+ B64 (212), B64 (213), B64 (214), B64 (215),
+ B64 (216), B64 (217), B64 (218), B64 (219),
+ B64 (220), B64 (221), B64 (222), B64 (223),
+ B64 (224), B64 (225), B64 (226), B64 (227),
+ B64 (228), B64 (229), B64 (230), B64 (231),
+ B64 (232), B64 (233), B64 (234), B64 (235),
+ B64 (236), B64 (237), B64 (238), B64 (239),
+ B64 (240), B64 (241), B64 (242), B64 (243),
+ B64 (244), B64 (245), B64 (246), B64 (247),
+ B64 (248), B64 (249), B64 (250), B64 (251),
+ B64 (252), B64 (253), B64 (254), B64 (255)
+};
+
+static bool
+is_base64_char (unsigned int c)
+{
+ return (c < 0x100) && (b64[c] != -1);
+}
+
+static void
+decode_base64_and_append (unsigned int b[4], int len)
+{
+ gas_assert (len > 1);
+
+ FRAG_APPEND_1_CHAR ((b64[b[0]] << 2) | (b64[b[1]] >> 4));
+ if (len == 2) return;
+ FRAG_APPEND_1_CHAR (((b64[b[1]] << 4) & 0xf0) | (b64[b[2]] >> 2));
+ if (len == 3) return;
+ FRAG_APPEND_1_CHAR (((b64[b[2]] << 6) & 0xc0) | b64[b[3]]);
+}
+
+/* Accept one or more comma separated, base64 encoded strings. Decode them
+ and store them at the current point in the current section. The strings
+ must be enclosed in double quotes. Line breaks, quoted characters and
+ escaped characters are not allowed. Only the characters "A-Za-z0-9+/" are
+ accepted inside the string. The string must be a multiple of four
+ characters in length. If the encoded string does not fit this requirement
+ it may use one or more '=' characters at the end as padding. */
+
+void
+s_base64 (int dummy ATTRIBUTE_UNUSED)
+{
+ unsigned int c;
+
+ /* If we have been switched into the abs_section then we
+ will not have an obstack onto which we can hang strings. */
+ if (now_seg == absolute_section)
+ {
+ as_bad (_("base64 strings must be placed into a section"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ if (is_it_end_of_statement ())
+ {
+ as_bad (_("a string must follow the .base64 pseudo-op"));
+ return;
+ }
+
+#ifdef md_flush_pending_output
+ md_flush_pending_output ();
+#endif
+
+#ifdef md_cons_align
+ md_cons_align (1);
+#endif
+
+ do
+ {
+ SKIP_ALL_WHITESPACE ();
+
+ c = * input_line_pointer ++;
+
+ if (c != '"')
+ {
+ as_bad (_("expected double quote enclosed string as argument to .base64 pseudo-op"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ /* Read a block of four base64 encoded characters. */
+ int i;
+ unsigned int b[4];
+ bool seen_equals = false;
+
+ loop:
+ for (i = 0; i < 4; i++)
+ {
+ c = * input_line_pointer ++;
+
+ if (c >= 256 || is_end_of_line [c])
+ {
+ as_bad (_("end of line encountered inside .base64 string"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ if (c == '"')
+ {
+ /* We allow this. But only if there were enough
+ characters to form a valid base64 encoding. */
+ if (i > 1)
+ {
+ as_warn (_(".base64 string terminated early"));
+ -- input_line_pointer;
+ break;
+ }
+
+ as_bad (_(".base64 string terminated unexpectedly"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ if (seen_equals && c != '=')
+ {
+ as_bad (_("equals character only allowed at end of .base64 string"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ if (c == '=')
+ {
+ if (i == 0)
+ {
+ as_bad (_("the equals character cannot start a block of four base64 encoded bytes"));
+ ignore_rest_of_line ();
+ return;
+ }
+ else if (i == 1)
+ {
+ as_bad (_("the equals character cannot be the second character in a block of four base64 encoded bytes"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ seen_equals = true;
+ }
+ else if (! is_base64_char (c))
+ {
+ if (ISPRINT (c))
+ as_bad (_("invalid character '%c' found inside .base64 string"), c);
+ else
+ as_bad (_("invalid character %#x found inside .base64 string"), c);
+ ignore_rest_of_line ();
+ return;
+ }
+
+ b[i] = c;
+ }
+
+ if (seen_equals && i == 4)
+ {
+ -- i;
+ if (b[2] == '=')
+ -- i;
+ }
+
+ /* We have a block of up to four valid base64 encoded bytes. */
+ decode_base64_and_append (b, i);
+
+ /* Check the next character. */
+ c = * input_line_pointer ++;
+
+ if (is_base64_char (c))
+ {
+ if (seen_equals)
+ {
+ as_bad (_("no base64 characters expected after '=' padding characters"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ -- input_line_pointer;
+ goto loop;
+ }
+ else if (c != '"')
+ {
+ as_bad (_(".base64 string must have a terminating double quote character"));
+ ignore_rest_of_line ();
+ return;
+ }
+
+ SKIP_ALL_WHITESPACE ();
+
+ c = * input_line_pointer ++;
+ }
+ while (c == ',');
+
+ -- input_line_pointer;
+
+ demand_empty_rest_of_line ();
+}
+
+
/* Worker to do .ascii etc statements.
Reads 0 or more ',' separated, double-quoted strings.
Caller should have checked need_pass_2 is FALSE because we don't
diff --git a/gas/read.h b/gas/read.h
index e9dee27..9908d44 100644
--- a/gas/read.h
+++ b/gas/read.h
@@ -215,6 +215,7 @@ extern void s_nops (int);
extern void s_stab (int what);
extern void s_struct (int);
extern void s_text (int);
+extern void s_base64 (int);
extern void stringer (int append_zero);
extern void s_xstab (int what);
extern void s_rva (int);
diff --git a/gas/testsuite/gas/all/base64-bad.d b/gas/testsuite/gas/all/base64-bad.d
new file mode 100644
index 0000000..d613b3e
--- /dev/null
+++ b/gas/testsuite/gas/all/base64-bad.d
@@ -0,0 +1,5 @@
+#name: .base64 errors test
+#source: base64-bad.s
+#error_output: base64-bad.l
+# The Z80 backend has its own error message for an unterminated string.
+#notarget: z80-*
diff --git a/gas/testsuite/gas/all/base64-bad.l b/gas/testsuite/gas/all/base64-bad.l
new file mode 100644
index 0000000..a860a1d
--- /dev/null
+++ b/gas/testsuite/gas/all/base64-bad.l
@@ -0,0 +1,11 @@
+.*: Assembler messages:
+.*: Warning: end of file in string; '"' inserted
+.*2: Warning: .base64 string terminated early
+.*3: Error: .base64 string terminated unexpectedly
+.*4: Error: invalid character ' ' found inside .base64 string
+.*5: Error: the equals character cannot start a block of four base64 encoded bytes
+.*6: Error: the equals character cannot be the second character in a block of four base64 encoded bytes
+.*7: Error: no base64 characters expected after '=' padding characters
+.*8: Error: junk at end of line, first unrecognized character is `"'
+.*9: Error: expected double quote enclosed string as argument to .base64 pseudo-op
+.*10: Error: end of line encountered inside .base64 string
diff --git a/gas/testsuite/gas/all/base64-bad.s b/gas/testsuite/gas/all/base64-bad.s
new file mode 100644
index 0000000..99c3bc2
--- /dev/null
+++ b/gas/testsuite/gas/all/base64-bad.s
@@ -0,0 +1,10 @@
+ .data
+ .base64 "1234567"
+ .base64 "NotInASection"
+ .base64 "invalid character"
+ .base64 "===="
+ .base64 "A==="
+ .base64 "in==side"
+ .base64 "miss" "comma"
+ .base64 no quotes
+ .base64 "noclosingcomma
diff --git a/gas/testsuite/gas/all/base64.d b/gas/testsuite/gas/all/base64.d
new file mode 100644
index 0000000..11410a7
--- /dev/null
+++ b/gas/testsuite/gas/all/base64.d
@@ -0,0 +1,20 @@
+#readelf: -x .data
+#name : .base64 test
+# Weirdly the binutils/gas test harness does not support a binary
+# dumping program like od. The closes we can get is readelf,
+# but of course this only works for ELF binaries. Hence the
+# restrictiopn below. In theory though the .base64 directive
+# will work for all output formats, even if we cannot test it.
+#target: [is_elf_format]
+
+Hex dump of section '.data':
+ 0x00000000 68656c6c 6f20776f 726c640a 70616464 hello world.padd
+ 0x00000010 696e6720 68657265 0a454c46 02010103 ing here.ELF....
+ 0x00000020 00000000 00000000 02003e00 01000000 ..........>.....
+ 0x00000030 50d34000 00000000 40000000 00000000 P.@.....@.......
+ 0x00000040 d8204b10 00000000 00000000 40003800 . K.........@.8.
+ 0x00000050 0e004000 2c002b00 06000000 04000000 ..@.,.+.........
+ 0x00000060 40000000 00000000 40004000 00000000 @.......@.@.....
+ 0x00000070 40004000 00000000 10030000 00000000 @.@.............
+ 0x00000080 10030000 00000000 08000000 00000000 ................
+#pass
diff --git a/gas/testsuite/gas/all/base64.s b/gas/testsuite/gas/all/base64.s
new file mode 100644
index 0000000..d176467
--- /dev/null
+++ b/gas/testsuite/gas/all/base64.s
@@ -0,0 +1,8 @@
+ .data
+ .global hello
+hello:
+ .base64 "aGVsbG8gd29ybGQK", "cGFkZGluZyBoZXJlCg=="
+
+ .global very_long
+very_long:
+ .base64 "RUxGAgEBAwAAAAAAAAAAAgA+AAEAAABQ00AAAAAAAEAAAAAAAAAA2CBLEAAAAAAAAAAAQAA4AA4AQAAsACsABgAAAAQAAABAAAAAAAAAAEAAQAAAAAAAQABAAAAAAAAQAwAAAAAAABADAAAAAAAACAAAAAAAAAAAAAAAAAAAAAA="
diff --git a/gas/testsuite/gas/all/gas.exp b/gas/testsuite/gas/all/gas.exp
index d262944..32363c5 100644
--- a/gas/testsuite/gas/all/gas.exp
+++ b/gas/testsuite/gas/all/gas.exp
@@ -468,6 +468,9 @@ gas_test_error "weakref2.s" "" "e: would close weakref loop: e => a => b => c =>
gas_test_error "weakref3.s" "" "a: would close weakref loop: a => b => c => d => e => a"
gas_test_error "weakref4.s" "" "is already defined"
+run_dump_test base64
+run_dump_test base64-bad
+
run_dump_test string
if [is_elf_format] {
run_dump_test none