aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Clifton <nickc@redhat.com>2021-11-18 16:48:19 +0000
committerNick Clifton <nickc@redhat.com>2021-11-18 16:48:19 +0000
commit578c64a45a0e47fd0af53c77339ec0c26ef4874a (patch)
treeb95f61afc34286ad08556eb14848e9ba2d0123a1
parent76eb8ef1ce470ca71b10fae721e32d49998d87b9 (diff)
downloadgdb-578c64a45a0e47fd0af53c77339ec0c26ef4874a.zip
gdb-578c64a45a0e47fd0af53c77339ec0c26ef4874a.tar.gz
gdb-578c64a45a0e47fd0af53c77339ec0c26ef4874a.tar.bz2
Add multibyte character warning option to the assembler.
* as.c (parse_args): Add support for --multibyte-handling. * as.h (multibyte_handling): Declare. * app.c (scan_for_multibyte_characters): New function. (do_scrub_chars): Call the new function if multibyte warning is enabled. * input-scrub,c (input_scrub_next_buffer): Call the multibyte scanning function if multibyte warnings are enabled. * symbols.c (struct symbol_flags): Add multibyte_warned bit. (symbol_init): Call the multibyte scanning function if multibyte symbol warnings are enabled. (S_SET_SEGMENT): Likewise. * NEWS: Mention the new feature. * doc/as.texi: Document the new feature. * testsuite/gas/all/multibyte.s: New test source file. * testsuite/gas/all/multibyte1.d: New test driver file. * testsuite/gas/all/multibyte1.l: New test expected output. * testsuite/gas/all/multibyte2.d: New test driver file. * testsuite/gas/all/multibyte2.l: New test expected output. * testsuite/gas/all/gas.exp: Run the new tests.
-rw-r--r--gas/ChangeLog22
-rw-r--r--gas/NEWS8
-rw-r--r--gas/app.c54
-rw-r--r--gas/as.c23
-rw-r--r--gas/as.h9
-rw-r--r--gas/doc/as.texi30
-rw-r--r--gas/input-scrub.c5
-rw-r--r--gas/symbols.c34
-rw-r--r--gas/testsuite/gas/all/gas.exp2
-rw-r--r--gas/testsuite/gas/all/multibyte.s8
-rw-r--r--gas/testsuite/gas/all/multibyte1.d3
-rw-r--r--gas/testsuite/gas/all/multibyte1.l12
-rw-r--r--gas/testsuite/gas/all/multibyte2.d3
-rw-r--r--gas/testsuite/gas/all/multibyte2.l2
14 files changed, 205 insertions, 10 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog
index e9761e9..8732088 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,25 @@
+2021-11-18 Nick Clifton <nickc@redhat.com>
+
+ * as.c (parse_args): Add support for --multibyte-handling.
+ * as.h (multibyte_handling): Declare.
+ * app.c (scan_for_multibyte_characters): New function.
+ (do_scrub_chars): Call the new function if multibyte warning is
+ enabled.
+ * input-scrub,c (input_scrub_next_buffer): Call the multibyte
+ scanning function if multibyte warnings are enabled.
+ * symbols.c (struct symbol_flags): Add multibyte_warned bit.
+ (symbol_init): Call the multibyte scanning function if multibyte
+ symbol warnings are enabled.
+ (S_SET_SEGMENT): Likewise.
+ * NEWS: Mention the new feature.
+ * doc/as.texi: Document the new feature.
+ * testsuite/gas/all/multibyte.s: New test source file.
+ * testsuite/gas/all/multibyte1.d: New test driver file.
+ * testsuite/gas/all/multibyte1.l: New test expected output.
+ * testsuite/gas/all/multibyte2.d: New test driver file.
+ * testsuite/gas/all/multibyte2.l: New test expected output.
+ * testsuite/gas/all/gas.exp: Run the new tests.
+
2021-11-15 Eric Botcazou <ebotcazou@adacore.com>
* doc/as.texi (File): Update description of .file 0 directive.
diff --git a/gas/NEWS b/gas/NEWS
index aac7522..4288e62 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -13,6 +13,14 @@
* Add support for Scalable Matrix Extension (SME) for AArch64.
+* The --multibyte-handling=[allow|warn|warn-sym-only] option tells the
+ assembler what to when it encoutners multibyte characters in the input. The
+ default is to allow them. Setting the option to "warn" will generate a
+ warning message whenever any multibyte character is encountered. Using the
+ option to "warn-sym-only" will make the assembler generate a warning whenever a
+ symbol is defined containing multibyte characters. (References to undefined
+ symbols will not generate warnings).
+
* Outputs of .ds.x directive and .tfloat directive with hex input from
x86 assembler have been reduced from 12 bytes to 10 bytes to match the
output of .tfloat directive.
diff --git a/gas/app.c b/gas/app.c
index 712bffe..0c15b96 100644
--- a/gas/app.c
+++ b/gas/app.c
@@ -345,6 +345,55 @@ process_escape (int ch)
}
}
+#define MULTIBYTE_WARN_COUNT_LIMIT 10
+static unsigned int multibyte_warn_count = 0;
+
+bool
+scan_for_multibyte_characters (const unsigned char * start,
+ const unsigned char * end,
+ bool warn)
+{
+ if (end <= start)
+ return false;
+
+ if (warn && multibyte_warn_count > MULTIBYTE_WARN_COUNT_LIMIT)
+ return false;
+
+ bool found = false;
+
+ while (start < end)
+ {
+ unsigned char c;
+
+ if ((c = * start++) <= 0x7f)
+ continue;
+
+ if (!warn)
+ return true;
+
+ found = true;
+
+ const char * filename;
+ unsigned int lineno;
+
+ filename = as_where (& lineno);
+ if (filename == NULL)
+ as_warn (_("multibyte character (%#x) encountered in input"), c);
+ else if (lineno == 0)
+ as_warn (_("multibyte character (%#x) encountered in %s"), c, filename);
+ else
+ as_warn (_("multibyte character (%#x) encountered in %s at or near line %u"), c, filename, lineno);
+
+ if (++ multibyte_warn_count == MULTIBYTE_WARN_COUNT_LIMIT)
+ {
+ as_warn (_("further multibyte character warnings suppressed"));
+ break;
+ }
+ }
+
+ return found;
+}
+
/* This function is called to process input characters. The GET
parameter is used to retrieve more input characters. GET should
set its parameter to point to a buffer, and return the length of
@@ -463,6 +512,11 @@ do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
return 0;
from = input_buffer;
fromend = from + fromlen;
+
+ if (multibyte_handling == multibyte_warn)
+ (void) scan_for_multibyte_characters ((const unsigned char *) from,
+ (const unsigned char* ) fromend,
+ true /* Generate warnings. */);
}
while (1)
diff --git a/gas/as.c b/gas/as.c
index 7de8af2..8af04aa 100644
--- a/gas/as.c
+++ b/gas/as.c
@@ -474,7 +474,7 @@ parse_args (int * pargc, char *** pargv)
OPTION_DEBUG_PREFIX_MAP,
OPTION_DEFSYM,
OPTION_LISTING_LHS_WIDTH,
- OPTION_LISTING_LHS_WIDTH2,
+ OPTION_LISTING_LHS_WIDTH2, /* = STD_BASE + 10 */
OPTION_LISTING_RHS_WIDTH,
OPTION_LISTING_CONT_LINES,
OPTION_DEPFILE,
@@ -484,7 +484,7 @@ parse_args (int * pargc, char *** pargv)
OPTION_GDWARF_3,
OPTION_GDWARF_4,
OPTION_GDWARF_5,
- OPTION_GDWARF_SECTIONS,
+ OPTION_GDWARF_SECTIONS, /* = STD_BASE + 20 */
OPTION_GDWARF_CIE_VERSION,
OPTION_STRIP_LOCAL_ABSOLUTE,
OPTION_TRADITIONAL_FORMAT,
@@ -494,7 +494,7 @@ parse_args (int * pargc, char *** pargv)
OPTION_NOEXECSTACK,
OPTION_SIZE_CHECK,
OPTION_ELF_STT_COMMON,
- OPTION_ELF_BUILD_NOTES,
+ OPTION_ELF_BUILD_NOTES, /* = STD_BASE + 30 */
OPTION_SECTNAME_SUBST,
OPTION_ALTERNATE,
OPTION_AL,
@@ -503,7 +503,8 @@ parse_args (int * pargc, char *** pargv)
OPTION_WARN_FATAL,
OPTION_COMPRESS_DEBUG,
OPTION_NOCOMPRESS_DEBUG,
- OPTION_NO_PAD_SECTIONS /* = STD_BASE + 40 */
+ OPTION_NO_PAD_SECTIONS,
+ OPTION_MULTIBYTE_HANDLING /* = STD_BASE + 40 */
/* When you add options here, check that they do
not collide with OPTION_MD_BASE. See as.h. */
};
@@ -581,6 +582,7 @@ parse_args (int * pargc, char *** pargv)
,{"target-help", no_argument, NULL, OPTION_TARGET_HELP}
,{"traditional-format", no_argument, NULL, OPTION_TRADITIONAL_FORMAT}
,{"warn", no_argument, NULL, OPTION_WARN}
+ ,{"multibyte-handling", required_argument, NULL, OPTION_MULTIBYTE_HANDLING}
};
/* Construct the option lists from the standard list and the target
@@ -683,6 +685,19 @@ parse_args (int * pargc, char *** pargv)
flag_traditional_format = 1;
break;
+ case OPTION_MULTIBYTE_HANDLING:
+ if (strcmp (optarg, "allow") == 0)
+ multibyte_handling = multibyte_allow;
+ else if (strcmp (optarg, "warn") == 0)
+ multibyte_handling = multibyte_warn;
+ else if (strcmp (optarg, "warn-sym-only") == 0)
+ multibyte_handling = multibyte_warn_syms;
+ else if (strcmp (optarg, "warn_sym_only") == 0)
+ multibyte_handling = multibyte_warn_syms;
+ else
+ as_fatal (_("unexpected argument to --multibyte-input-option: '%s'"), optarg);
+ break;
+
case OPTION_VERSION:
/* This output is intended to follow the GNU standards document. */
printf (_("GNU assembler %s\n"), BFD_VERSION_STRING);
diff --git a/gas/as.h b/gas/as.h
index f3f12fb..89dae1b 100644
--- a/gas/as.h
+++ b/gas/as.h
@@ -344,6 +344,14 @@ COMMON int linkrelax;
COMMON int do_not_pad_sections_to_alignment;
+enum multibyte_input_handling
+{
+ multibyte_allow = 0,
+ multibyte_warn,
+ multibyte_warn_syms
+};
+COMMON enum multibyte_input_handling multibyte_handling;
+
/* TRUE if we should produce a listing. */
extern int listing;
@@ -450,6 +458,7 @@ void input_scrub_insert_file (char *);
char * input_scrub_new_file (const char *);
char * input_scrub_next_buffer (char **bufp);
size_t do_scrub_chars (size_t (*get) (char *, size_t), char *, size_t);
+bool scan_for_multibyte_characters (const unsigned char *, const unsigned char *, bool);
int gen_to_words (LITTLENUM_TYPE *, int, long);
int had_err (void);
int ignore_input (void);
diff --git a/gas/doc/as.texi b/gas/doc/as.texi
index 9c1924d..b83f50b 100644
--- a/gas/doc/as.texi
+++ b/gas/doc/as.texi
@@ -245,6 +245,7 @@ gcc(1), ld(1), and the Info entries for @file{binutils} and @file{ld}.
[@b{--sectname-subst}] [@b{--size-check=[error|warning]}]
[@b{--elf-stt-common=[no|yes]}]
[@b{--generate-missing-build-notes=[no|yes]}]
+ [@b{--multibyte-handling=[allow|warn|warn-sym-only]}]
[@b{--target-help}] [@var{target-options}]
[@b{--}|@var{files} @dots{}]
@c
@@ -871,6 +872,18 @@ Set the maximum width of an input source line, as displayed in a listing, to
Set the maximum number of lines printed in a listing for a single line of input
to @var{number} + 1.
+@item --multibyte-handling=allow
+@itemx --multibyte-handling=warn
+@itemx --multibyte-handling=warn-sym-only
+Controls how the assembler handles multibyte characters in the input. The
+default (which can be restored by using the @option{allow} argument) is to
+allow such characters without complaint. Using the @option{warn} argument will
+make the assembler generate a warning message whenever any multibyte character
+is encountered. Using the @option{warn-sym-only} argument will only cause a
+warning to be generated when a symbol is defined with a name that contains
+multibyte characters. (References to undefined symbols will not generate a
+warning).
+
@item --no-pad-sections
Stop the assembler for padding the ends of output sections to the alignment
of that section. The default is to pad the sections, but this can waste space
@@ -2966,9 +2979,11 @@ are noted in @ref{Machine Dependencies}.
@end ifset
No symbol may begin with a digit. Case is significant.
There is no length limit; all characters are significant. Multibyte characters
-are supported. Symbols are delimited by characters not in that set, or by the
-beginning of a file (since the source program must end with a newline, the end
-of a file is not a possible symbol delimiter). @xref{Symbols}.
+are supported, but note that the setting of the
+@option{--multibyte-handling} option might prevent their use. Symbols
+are delimited by characters not in that set, or by the beginning of a file
+(since the source program must end with a newline, the end of a file is not a
+possible symbol delimiter). @xref{Symbols}.
Symbol names may also be enclosed in double quote @code{"} characters. In such
cases any characters are allowed, except for the NUL character. If a double
@@ -3858,11 +3873,18 @@ than @code{Foo}.
Symbol names do not start with a digit. An exception to this rule is made for
Local Labels. See below.
-Multibyte characters are supported. To generate a symbol name containing
+Multibyte characters are supported, but note that the setting of the
+@option{multibyte-handling} option might prevent their use.
+To generate a symbol name containing
multibyte characters enclose it within double quotes and use escape codes. cf
@xref{Strings}. Generating a multibyte symbol name from a label is not
currently supported.
+Since multibyte symbol names are unusual, and could possibly be used
+maliciously, @command{@value{AS}} provides a command line option
+(@option{--multibyte-handling=warn-sym-only}) which can be used to generate a
+warning message whenever a symbol name containing multibyte characters is defined.
+
Each symbol has exactly one name. Each name in an assembly language program
refers to exactly one symbol. You may use that symbol name any number of times
in a program.
diff --git a/gas/input-scrub.c b/gas/input-scrub.c
index b93afb2..c665402 100644
--- a/gas/input-scrub.c
+++ b/gas/input-scrub.c
@@ -377,6 +377,11 @@ input_scrub_next_buffer (char **bufp)
++p;
}
+ if (multibyte_handling == multibyte_warn)
+ (void) scan_for_multibyte_characters ((const unsigned char *) p,
+ (const unsigned char *) limit,
+ true /* Generate warnings */);
+
/* We found a newline in the newly read chars. */
partial_where = p;
partial_size = limit - p;
diff --git a/gas/symbols.c b/gas/symbols.c
index 3cb9425..889ec66 100644
--- a/gas/symbols.c
+++ b/gas/symbols.c
@@ -82,6 +82,10 @@ struct symbol_flags
/* Whether the symbol has been marked to be removed by a .symver
directive. */
unsigned int removed : 1;
+
+ /* Set when a warning about the symbol containing multibyte characters
+ is generated. */
+ unsigned int multibyte_warned : 1;
};
/* A pointer in the symbol may point to either a complete symbol
@@ -198,7 +202,7 @@ static void *
symbol_entry_find (htab_t table, const char *name)
{
hashval_t hash = htab_hash_string (name);
- symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ symbol_entry_t needle = { { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
hash, name, 0, 0, 0 } };
return htab_find_with_hash (table, &needle, hash);
}
@@ -309,6 +313,18 @@ symbol_init (symbolS *symbolP, const char *name, asection *sec,
symbolP->bsym->name = name;
symbolP->bsym->section = sec;
+ if (multibyte_handling == multibyte_warn_syms
+ && ! symbolP->flags.local_symbol
+ && sec != undefined_section
+ && ! symbolP->flags.multibyte_warned
+ && scan_for_multibyte_characters ((const unsigned char *) name,
+ (const unsigned char *) name + strlen (name),
+ false /* Do not warn. */))
+ {
+ as_warn (_("symbol '%s' contains multibyte characters"), name);
+ symbolP->flags.multibyte_warned = 1;
+ }
+
S_SET_VALUE (symbolP, valu);
symbol_clear_list_pointers (symbolP);
@@ -2427,7 +2443,21 @@ S_SET_SEGMENT (symbolS *s, segT seg)
abort ();
}
else
- s->bsym->section = seg;
+ {
+ if (multibyte_handling == multibyte_warn_syms
+ && ! s->flags.local_symbol
+ && seg != undefined_section
+ && ! s->flags.multibyte_warned
+ && scan_for_multibyte_characters ((const unsigned char *) s->name,
+ (const unsigned char *) s->name + strlen (s->name),
+ false))
+ {
+ as_warn (_("symbol '%s' contains multibyte characters"), s->name);
+ s->flags.multibyte_warned = 1;
+ }
+
+ s->bsym->section = seg;
+ }
}
void
diff --git a/gas/testsuite/gas/all/gas.exp b/gas/testsuite/gas/all/gas.exp
index 2c812b1..5eee4f8 100644
--- a/gas/testsuite/gas/all/gas.exp
+++ b/gas/testsuite/gas/all/gas.exp
@@ -502,3 +502,5 @@ run_dump_test "nop"
run_dump_test "asciz"
run_dump_test "pr27384"
run_dump_test "pr27381"
+run_dump_test "multibyte1"
+run_dump_test "multibyte2"
diff --git a/gas/testsuite/gas/all/multibyte.s b/gas/testsuite/gas/all/multibyte.s
new file mode 100644
index 0000000..f93ea46
--- /dev/null
+++ b/gas/testsuite/gas/all/multibyte.s
@@ -0,0 +1,8 @@
+ .text
+ .globl he‮oll‬
+he‮oll‬:
+ .nop
+
+ .globl hello
+hello:
+ .nop
diff --git a/gas/testsuite/gas/all/multibyte1.d b/gas/testsuite/gas/all/multibyte1.d
new file mode 100644
index 0000000..dcbd54d
--- /dev/null
+++ b/gas/testsuite/gas/all/multibyte1.d
@@ -0,0 +1,3 @@
+#source: multibyte.s
+#as: --multibyte-handling=warn
+#warning_output: multibyte1.l
diff --git a/gas/testsuite/gas/all/multibyte1.l b/gas/testsuite/gas/all/multibyte1.l
new file mode 100644
index 0000000..a592c17
--- /dev/null
+++ b/gas/testsuite/gas/all/multibyte1.l
@@ -0,0 +1,12 @@
+[^:]*: Assembler messages:
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xac\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0x80\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xae\) encountered in .*multibyte.s
+[^:]*: Warning: multibyte character \(0xe2\) encountered in .*multibyte.s
+[^:]*: Warning: further multibyte character warnings suppressed
diff --git a/gas/testsuite/gas/all/multibyte2.d b/gas/testsuite/gas/all/multibyte2.d
new file mode 100644
index 0000000..3a268de
--- /dev/null
+++ b/gas/testsuite/gas/all/multibyte2.d
@@ -0,0 +1,3 @@
+#source: multibyte.s
+#as: --multibyte-handling=warn-sym-only
+#warning_output: multibyte2.l
diff --git a/gas/testsuite/gas/all/multibyte2.l b/gas/testsuite/gas/all/multibyte2.l
new file mode 100644
index 0000000..18d7ca5
--- /dev/null
+++ b/gas/testsuite/gas/all/multibyte2.l
@@ -0,0 +1,2 @@
+[^:]*: Assembler messages:
+[^:]*:3: Warning: symbol '.*' contains multibyte characters