diff options
author | David Faust <david.faust@oracle.com> | 2023-11-16 09:35:03 -0800 |
---|---|---|
committer | David Faust <david.faust@oracle.com> | 2023-11-17 13:22:27 -0800 |
commit | e5d6f72eb3db66939f0d14e78056e118e83aca37 (patch) | |
tree | 7c573f152f6cc08a17865deb7b305250a0424042 /gas | |
parent | 413366203156a0483d06a97b3f02b64ba6a215cc (diff) | |
download | gdb-e5d6f72eb3db66939f0d14e78056e118e83aca37.zip gdb-e5d6f72eb3db66939f0d14e78056e118e83aca37.tar.gz gdb-e5d6f72eb3db66939f0d14e78056e118e83aca37.tar.bz2 |
bpf: avoid creating wrong symbols while parsing
To support the "pseudo-C" asm dialect in BPF, the BPF parser must often
attempt multiple different templates for a single instruction. In some
cases this can cause the parser to incorrectly parse part of the
instruction opcode as an expression, which leads to the creation of a
new undefined symbol.
Once the parser recognizes the error, the expression is discarded and it
tries again with a new instruction template. However, symbols created
during the process are added to the symbol table and are not removed
even if the expression is discarded.
This is a problem for BPF: generally the assembled object will be loaded
directly to the Linux kernel, without being linked. These erroneous
parser-created symbols are rejected by the kernel BPF loader, and the
entire object is refused.
This patch remedies the issue by tentatively creating symbols while
parsing instruction operands, and storing them in a temporary list
rather than immediately inserting them into the symbol table. Later,
after the parser is sure that it has correctly parsed the instruction,
those symbols are committed to the real symbol table.
This approach is modeled directly after Jan Beulich's patch for RISC-V:
commit 7a29ee290307087e1749ce610207e93a15d0b78d
RISC-V: adjust logic to avoid register name symbols
Many thanks to Jan for recognizing the problem as similar, and pointing
me to that patch.
gas/
* config/tc-bpf.c (parsing_insn_operands): New.
(parse_expression): Set it here.
(deferred_sym_rootP, deferred_sym_lastP): New.
(orphan_sym_rootP, orphan_sym_lastP): New.
(bpf_parse_name): New.
(parse_error): Clear deferred symbol list on error.
(md_assemble): Clear parsing_insn_operands. Commit deferred
symbols to symbol table on successful parse.
* config/tc-bpf.h (md_parse_name): Define to...
(bpf_parse_name): ...this. New prototype.
* testsuite/gas/bpf/asm-extra-sym-1.s: New test source.
* testsuite/gas/bpf/asm-extra-sym-1.d: New test.
* testsuite/gas/bpf/bpf.exp: Run new test.
Diffstat (limited to 'gas')
-rw-r--r-- | gas/config/tc-bpf.c | 92 | ||||
-rw-r--r-- | gas/config/tc-bpf.h | 4 | ||||
-rw-r--r-- | gas/testsuite/gas/bpf/asm-extra-sym-1.d | 7 | ||||
-rw-r--r-- | gas/testsuite/gas/bpf/asm-extra-sym-1.s | 1 | ||||
-rw-r--r-- | gas/testsuite/gas/bpf/bpf.exp | 3 |
5 files changed, 107 insertions, 0 deletions
diff --git a/gas/config/tc-bpf.c b/gas/config/tc-bpf.c index fd4144a..3122f80 100644 --- a/gas/config/tc-bpf.c +++ b/gas/config/tc-bpf.c @@ -1223,6 +1223,7 @@ add_relaxed_insn (struct bpf_insn *insn, expressionS *exp) See md_operand below to see how exp_parse_failed is used. */ static int exp_parse_failed = 0; +static bool parsing_insn_operands = false; static char * parse_expression (char *s, expressionS *exp) @@ -1230,6 +1231,9 @@ parse_expression (char *s, expressionS *exp) char *saved_input_line_pointer = input_line_pointer; char *saved_s = s; + /* Wake up bpf_parse_name before the call to expression (). */ + parsing_insn_operands = true; + exp_parse_failed = 0; input_line_pointer = s; expression (exp); @@ -1251,6 +1255,71 @@ parse_expression (char *s, expressionS *exp) return s; } +/* Symbols created by this parse, but not yet committed to the real + symbol table. */ +static symbolS *deferred_sym_rootP; +static symbolS *deferred_sym_lastP; + +/* Symbols discarded by a previous parse. Symbols cannot easily be freed + after creation, so try to recycle. */ +static symbolS *orphan_sym_rootP; +static symbolS *orphan_sym_lastP; + +/* Implement md_parse_name hook. Handles any symbol found in an expression. + This allows us to tentatively create symbols, before we know for sure + whether the parser is using the correct template for an instruction. + If we end up keeping the instruction, the deferred symbols are committed + to the real symbol table. This approach is modeled after the riscv port. */ + +bool +bpf_parse_name (const char *name, expressionS *exp, enum expr_mode mode) +{ + symbolS *sym; + + /* If we aren't currently parsing an instruction, don't do anything. + This prevents tampering with operands to directives. */ + if (!parsing_insn_operands) + return false; + + gas_assert (mode == expr_normal); + + if (symbol_find (name) != NULL) + return false; + + for (sym = deferred_sym_rootP; sym; sym = symbol_next (sym)) + if (strcmp (name, S_GET_NAME (sym)) == 0) + break; + + /* Tentatively create a symbol. */ + if (!sym) + { + /* See if we can reuse a symbol discarded by a previous parse. + This may be quite common, for example when trying multiple templates + for an instruction with the first reference to a valid symbol. */ + for (sym = orphan_sym_rootP; sym; sym = symbol_next (sym)) + if (strcmp (name, S_GET_NAME (sym)) == 0) + { + symbol_remove (sym, &orphan_sym_rootP, &orphan_sym_lastP); + break; + } + + if (!sym) + sym = symbol_create (name, undefined_section, &zero_address_frag, 0); + + /* Add symbol to the deferred list. If we commit to the isntruction, + then the symbol will be inserted into to the real symbol table at + that point (in md_assemble). */ + symbol_append (sym, deferred_sym_lastP, &deferred_sym_rootP, + &deferred_sym_lastP); + } + + exp->X_op = O_symbol; + exp->X_add_symbol = sym; + exp->X_add_number = 0; + + return true; +} + /* Parse a BPF register name and return the corresponding register number. Return NULL in case of parse error, or a pointer to the first character in S that is not part of the register name. */ @@ -1317,6 +1386,16 @@ parse_error (int length, const char *fmt, ...) va_end (args); partial_match_length = length; } + + /* Discard deferred symbols from the failed parse. They may potentially + be reused in the future from the orphan list. */ + while (deferred_sym_rootP) + { + symbolS *sym = deferred_sym_rootP; + symbol_remove (sym, &deferred_sym_rootP, &deferred_sym_lastP); + symbol_append (sym, orphan_sym_lastP, &orphan_sym_rootP, + &orphan_sym_lastP); + } } /* Assemble a machine instruction in STR and emit the frags/bytes it @@ -1606,6 +1685,10 @@ md_assemble (char *str ATTRIBUTE_UNUSED) } } + /* Mark that we are no longer parsing an instruction, bpf_parse_name does + not interfere with symbols in e.g. assembler directives. */ + parsing_insn_operands = false; + if (opcode == NULL) { as_bad (_("unrecognized instruction `%s'"), str); @@ -1622,6 +1705,15 @@ md_assemble (char *str ATTRIBUTE_UNUSED) #undef PARSE_ERROR + /* Commit any symbols created while parsing the instruction. */ + while (deferred_sym_rootP) + { + symbolS *sym = deferred_sym_rootP; + symbol_remove (sym, &deferred_sym_rootP, &deferred_sym_lastP); + symbol_append (sym, symbol_lastP, &symbol_rootP, &symbol_lastP); + symbol_table_insert (sym); + } + /* Generate the frags and fixups for the parsed instruction. */ if (do_relax && isa_spec >= BPF_V4 && insn.is_relaxable) { diff --git a/gas/config/tc-bpf.h b/gas/config/tc-bpf.h index 9fb71ed..06096ef 100644 --- a/gas/config/tc-bpf.h +++ b/gas/config/tc-bpf.h @@ -51,6 +51,10 @@ a jump to offset 0 means jump to the next instruction. */ #define md_single_noop_insn "ja 0" +#define md_parse_name(name, exp, mode, c) \ + bpf_parse_name (name, exp, mode) +bool bpf_parse_name (const char *, struct expressionS *, enum expr_mode); + #define TC_EQUAL_IN_INSN(c, s) bpf_tc_equal_in_insn ((c), (s)) extern bool bpf_tc_equal_in_insn (int, char *); diff --git a/gas/testsuite/gas/bpf/asm-extra-sym-1.d b/gas/testsuite/gas/bpf/asm-extra-sym-1.d new file mode 100644 index 0000000..113750d --- /dev/null +++ b/gas/testsuite/gas/bpf/asm-extra-sym-1.d @@ -0,0 +1,7 @@ +#as: -EL -mdialect=pseudoc +#nm: --numeric-sort +#source: asm-extra-sym-1.s +#name: BPF pseudoc no extra symbols 1 + +# Note: there should be no output from nm. +# Previously a bug in the BPF parser created an UND '*' symbol. diff --git a/gas/testsuite/gas/bpf/asm-extra-sym-1.s b/gas/testsuite/gas/bpf/asm-extra-sym-1.s new file mode 100644 index 0000000..2cfa605 --- /dev/null +++ b/gas/testsuite/gas/bpf/asm-extra-sym-1.s @@ -0,0 +1 @@ + r2 = *(u32*)(r1 + 8) diff --git a/gas/testsuite/gas/bpf/bpf.exp b/gas/testsuite/gas/bpf/bpf.exp index 80f5a1d..fcbeccd 100644 --- a/gas/testsuite/gas/bpf/bpf.exp +++ b/gas/testsuite/gas/bpf/bpf.exp @@ -72,4 +72,7 @@ if {[istarget bpf*-*-*]} { run_dump_test disp16-overflow-relax run_dump_test disp32-overflow run_dump_test imm32-overflow + + # Test that parser does not create undefined symbols + run_dump_test asm-extra-sym-1 } |