diff options
Diffstat (limited to 'gas')
-rw-r--r-- | gas/ChangeLog | 9 | ||||
-rw-r--r-- | gas/doc/as.texinfo | 13 | ||||
-rw-r--r-- | gas/read.c | 238 | ||||
-rw-r--r-- | gas/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gas/testsuite/gas/elf/elf.exp | 2 | ||||
-rw-r--r-- | gas/testsuite/gas/elf/syms.d | 18 | ||||
-rw-r--r-- | gas/testsuite/gas/elf/syms.s | 5 |
7 files changed, 186 insertions, 106 deletions
diff --git a/gas/ChangeLog b/gas/ChangeLog index caef2ba..a32f87c 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,12 @@ +2012-05-28 Nick Clifton <nickc@redhat.com> + + * read.c (read_symbol_name): New function. Reads a symbol names. + Allows escape codes in names. + (s_comm_internal): Use read_symbol_name. + (s_globl, s_lsym, s_set, s_weakref): Likewise. + * doc/as.texinfo: Document support for multibyte characters in + symbol names. + 2012-05-21 Mike Frysinger <vapier@gentoo.org> * config/tc-mips.c (mips_after_parse_args): Assert that arch_info diff --git a/gas/doc/as.texinfo b/gas/doc/as.texinfo index 694c806..72b5d05 100644 --- a/gas/doc/as.texinfo +++ b/gas/doc/as.texinfo @@ -2485,10 +2485,10 @@ On most machines, you can also use @code{$} in symbol names; exceptions are noted in @ref{Machine Dependencies}. @end ifset No symbol may begin with a digit. Case is significant. -There is no length limit: all characters are significant. Symbols are -delimited by characters not in that set, or by the beginning of a file -(since the source program must end with a newline, the end of a file is -not a possible symbol delimiter). @xref{Symbols}. +There is no length limit: all characters are significant. Multibyte characters +are supported. Symbols are delimited by characters not in that set, or by the +beginning of a file (since the source program must end with a newline, the end +of a file is not a possible symbol delimiter). @xref{Symbols}. @cindex length of symbols @node Statements @@ -3414,6 +3414,11 @@ on the H8/300), and underscores. Case of letters is significant: @code{foo} is a different symbol name than @code{Foo}. +Multibyte characters are supported. To generate a symbol name containing +multibyte characters enclose it within double quotes and use escape codes. cf +@xref{Strings}. Generating a multibyte symbol name from a label is not +currently supported. + Each symbol has exactly one name. Each name in an assembly language program refers to exactly one symbol. You may use that symbol name any number of times in a program. @@ -40,6 +40,7 @@ #include "obstack.h" #include "ecoff.h" #include "dw2gencfi.h" +#include "wchar.h" #ifndef TC_START_LABEL #define TC_START_LABEL(x,y,z) (x == ':') @@ -1583,13 +1584,106 @@ s_altmacro (int on) macro_set_alternate (on); } +/* Read a symbol name from input_line_pointer. + + Stores the symbol name in a buffer and returns a pointer to this buffer. + The buffer is xalloc'ed. It is the caller's responsibility to free + this buffer. + + The name is not left in the i_l_p buffer as it may need processing + to handle escape characters. + + Advances i_l_p to the next non-whitespace character. + + If a symbol name could not be read, the routine issues an error + messages, skips to the end of the line and returns NULL. */ + +static char * +read_symbol_name (void) +{ + char * name; + char * start; + char c; + + c = *input_line_pointer++; + + if (c == '"') + { +#define SYM_NAME_CHUNK_LEN 128 + ptrdiff_t len = SYM_NAME_CHUNK_LEN; + char * name_end; + unsigned int C; + + start = name = xmalloc (len + 1); + + name_end = name + SYM_NAME_CHUNK_LEN; + + while (is_a_char (C = next_char_of_string ())) + { + if (name >= name_end) + { + ptrdiff_t sofar; + + sofar = name - start; + len += SYM_NAME_CHUNK_LEN; + start = xrealloc (start, len + 1); + name_end = start + len; + name = start + sofar; + } + + *name++ = (char) C; + } + *name = 0; + + /* Since quoted symbol names can contain non-ASCII characters, + check the string and warn if it cannot be recognised by the + current character set. */ + if (mbstowcs (NULL, name, len) == (size_t) -1) + as_warn (_("symbol name not recognised in the current locale")); + } + else if (is_name_beginner (c) || c == '\001') + { + ptrdiff_t len; + + name = input_line_pointer - 1; + + /* We accept \001 in a name in case this is + being called with a constructed string. */ + while (is_part_of_name (c = *input_line_pointer++) + || c == '\001') + ; + + len = (input_line_pointer - name) - 1; + start = xmalloc (len + 1); + + memcpy (start, name, len); + start[len] = 0; + + /* Skip a name ender char if one is present. */ + if (! is_name_ender (c)) + --input_line_pointer; + } + else + name = start = NULL; + + if (name == start) + { + as_bad (_("expected symbol name")); + ignore_rest_of_line (); + return NULL; + } + + SKIP_WHITESPACE (); + + return start; +} + + symbolS * s_comm_internal (int param, symbolS *(*comm_parse_extra) (int, symbolS *, addressT)) { char *name; - char c; - char *p; offsetT temp, size; symbolS *symbolP = NULL; char *stop = NULL; @@ -1599,20 +1693,8 @@ s_comm_internal (int param, if (flag_mri) stop = mri_comment_field (&stopc); - name = input_line_pointer; - c = get_symbol_end (); - /* Just after name is now '\0'. */ - p = input_line_pointer; - *p = c; - - if (name == p) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - goto out; - } - - SKIP_WHITESPACE (); + if ((name = read_symbol_name ()) == NULL) + goto out; /* Accept an optional comma after the name. The comma used to be required, but Irix 5 cc does not generate it for .lcomm. */ @@ -1635,7 +1717,6 @@ s_comm_internal (int param, goto out; } - *p = 0; symbolP = symbol_find_or_make (name); if ((S_IS_DEFINED (symbolP) || symbol_equated_p (symbolP)) && !S_IS_COMMON (symbolP)) @@ -1644,7 +1725,6 @@ s_comm_internal (int param, { symbolP = NULL; as_bad (_("symbol `%s' is already defined"), name); - *p = c; ignore_rest_of_line (); goto out; } @@ -1662,7 +1742,6 @@ s_comm_internal (int param, as_warn (_("size of \"%s\" is already %ld; not changing to %ld"), name, (long) size, (long) temp); - *p = c; if (comm_parse_extra != NULL) symbolP = (*comm_parse_extra) (param, symbolP, size); else @@ -1676,6 +1755,8 @@ s_comm_internal (int param, out: if (flag_mri) mri_comment_end (stop, stopc); + if (name != NULL) + free (name); return symbolP; } @@ -2179,12 +2260,12 @@ s_globl (int ignore ATTRIBUTE_UNUSED) do { - name = input_line_pointer; - c = get_symbol_end (); + if ((name = read_symbol_name ()) == NULL) + return; + symbolP = symbol_find_or_make (name); S_SET_EXTERNAL (symbolP); - *input_line_pointer = c; SKIP_WHITESPACE (); c = *input_line_pointer; if (c == ',') @@ -2194,6 +2275,8 @@ s_globl (int ignore ATTRIBUTE_UNUSED) if (is_end_of_line[(unsigned char) *input_line_pointer]) c = '\n'; } + + free (name); } while (c == ','); @@ -2580,33 +2663,17 @@ void s_lsym (int ignore ATTRIBUTE_UNUSED) { char *name; - char c; - char *p; expressionS exp; symbolS *symbolP; /* We permit ANY defined expression: BSD4.2 demands constants. */ - name = input_line_pointer; - c = get_symbol_end (); - p = input_line_pointer; - *p = c; - - if (name == p) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - return; - } - - SKIP_WHITESPACE (); + if ((name = read_symbol_name ()) == NULL) + return; if (*input_line_pointer != ',') { - *p = 0; as_bad (_("expected comma after \"%s\""), name); - *p = c; - ignore_rest_of_line (); - return; + goto err_out; } input_line_pointer++; @@ -2616,11 +2683,9 @@ s_lsym (int ignore ATTRIBUTE_UNUSED) && exp.X_op != O_register) { as_bad (_("bad expression")); - ignore_rest_of_line (); - return; + goto err_out; } - *p = 0; symbolP = symbol_find_or_make (name); if (S_GET_SEGMENT (symbolP) == undefined_section) @@ -2638,8 +2703,14 @@ s_lsym (int ignore ATTRIBUTE_UNUSED) as_bad (_("symbol `%s' is already defined"), name); } - *p = c; demand_empty_rest_of_line (); + free (name); + return; + + err_out: + ignore_rest_of_line (); + free (name); + return; } /* Read a line into an sb. Returns the character that ended the line @@ -3283,42 +3354,25 @@ void s_set (int equiv) { char *name; - char delim; - char *end_name; /* Especial apologies for the random logic: this just grew, and could be parsed much more simply! Dean in haste. */ - name = input_line_pointer; - delim = get_symbol_end (); - end_name = input_line_pointer; - *end_name = delim; - - if (name == end_name) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - return; - } - - SKIP_WHITESPACE (); + if ((name = read_symbol_name ()) == NULL) + return; if (*input_line_pointer != ',') { - *end_name = 0; as_bad (_("expected comma after \"%s\""), name); - *end_name = delim; ignore_rest_of_line (); + free (name); return; } input_line_pointer++; - *end_name = 0; - assign_symbol (name, equiv); - *end_name = delim; - demand_empty_rest_of_line (); + free (name); } void @@ -3622,23 +3676,12 @@ void s_weakref (int ignore ATTRIBUTE_UNUSED) { char *name; - char delim; - char *end_name; symbolS *symbolP; symbolS *symbolP2; expressionS exp; - name = input_line_pointer; - delim = get_symbol_end (); - end_name = input_line_pointer; - - if (name == end_name) - { - as_bad (_("expected symbol name")); - *end_name = delim; - ignore_rest_of_line (); - return; - } + if ((name = read_symbol_name ()) == NULL) + return; symbolP = symbol_find_or_make (name); @@ -3647,41 +3690,27 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) if (!S_IS_VOLATILE (symbolP)) { as_bad (_("symbol `%s' is already defined"), name); - *end_name = delim; - ignore_rest_of_line (); - return; + goto err_out; } symbolP = symbol_clone (symbolP, 1); S_CLEAR_VOLATILE (symbolP); } - *end_name = delim; - SKIP_WHITESPACE (); if (*input_line_pointer != ',') { - *end_name = 0; as_bad (_("expected comma after \"%s\""), name); - *end_name = delim; - ignore_rest_of_line (); - return; + goto err_out; } input_line_pointer++; SKIP_WHITESPACE (); + free (name); - name = input_line_pointer; - delim = get_symbol_end (); - end_name = input_line_pointer; - - if (name == end_name) - { - as_bad (_("expected symbol name")); - ignore_rest_of_line (); - return; - } + if ((name = read_symbol_name ()) == NULL) + return; if ((symbolP2 = symbol_find_noref (name, 1)) == NULL && (symbolP2 = md_undefined_symbol (name)) == NULL) @@ -3712,6 +3741,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) while (symp != symbolP) { char *old_loop = loop; + symp = symbol_get_value_expression (symp)->X_add_symbol; loop = concat (loop, " => ", S_GET_NAME (symp), (const char *) NULL); @@ -3722,8 +3752,7 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) S_GET_NAME (symbolP), loop); free (loop); - - *end_name = delim; + free (name); ignore_rest_of_line (); return; } @@ -3734,8 +3763,6 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) /* symbolP2 = symp; */ } - *end_name = delim; - memset (&exp, 0, sizeof (exp)); exp.X_op = O_symbol; exp.X_add_symbol = symbolP2; @@ -3746,6 +3773,13 @@ s_weakref (int ignore ATTRIBUTE_UNUSED) S_SET_WEAKREFR (symbolP); demand_empty_rest_of_line (); + free (name); + return; + + err_out: + ignore_rest_of_line (); + free (name); + return; } diff --git a/gas/testsuite/ChangeLog b/gas/testsuite/ChangeLog index 13a6344..e19086a 100644 --- a/gas/testsuite/ChangeLog +++ b/gas/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2012-05-28 Nick Clifton <nickc@redhat.com> + + * gas/elf/syms.s: New test - checks the generation of multibyte + symbol names. + * gas/elf/syms.d: New file - expected readelf output. + * gas/elf/elf.exp: Add syms. + 2012-05-25 Alan Modra <amodra@gmail.com> * gas/lns/lns-big-delta.s: Add nops. diff --git a/gas/testsuite/gas/elf/elf.exp b/gas/testsuite/gas/elf/elf.exp index 736eec9..b437730 100644 --- a/gas/testsuite/gas/elf/elf.exp +++ b/gas/testsuite/gas/elf/elf.exp @@ -184,6 +184,8 @@ if { [is_elf_format] } then { run_dump_test "bad-size" run_dump_test "bad-group" + run_dump_test "syms" + load_lib gas-dg.exp dg-init dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/err-*.s $srcdir/$subdir/warn-*.s]] "" "" diff --git a/gas/testsuite/gas/elf/syms.d b/gas/testsuite/gas/elf/syms.d new file mode 100644 index 0000000..40f7706 --- /dev/null +++ b/gas/testsuite/gas/elf/syms.d @@ -0,0 +1,18 @@ +#readelf: -S -s -p .strtab +#name: Multibyte symbol names +# The following targets use an unusual .set syntax... +#not-target: alpha*-*-* h8300-*-* + +#... +Section Headers: +#... + \[ .\] sec.*tion.* +#... +Symbol table.* +#... + ..: .*sy.*mbol +#... +String dump.* +#... + \[......\] sy.*mbol +#pass diff --git a/gas/testsuite/gas/elf/syms.s b/gas/testsuite/gas/elf/syms.s new file mode 100644 index 0000000..977c6bb --- /dev/null +++ b/gas/testsuite/gas/elf/syms.s @@ -0,0 +1,5 @@ + .section "sec\xa5\xc2tion" + + .set "sy\xa5\xc2mbol", . + + .string8 "str\xa5\xc2ing" |