diff options
Diffstat (limited to 'libcpp')
-rw-r--r-- | libcpp/ChangeLog | 32 | ||||
-rw-r--r-- | libcpp/include/cpp-id-data.h | 4 | ||||
-rw-r--r-- | libcpp/include/cpplib.h | 14 | ||||
-rw-r--r-- | libcpp/internal.h | 4 | ||||
-rw-r--r-- | libcpp/lex.c | 62 | ||||
-rw-r--r-- | libcpp/macro.c | 59 | ||||
-rw-r--r-- | libcpp/traditional.c | 3 |
7 files changed, 132 insertions, 46 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index fc577f1..c29fe87 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,35 @@ +2014-11-06 Joseph Myers <joseph@codesourcery.com> + + * include/cpp-id-data.h (struct cpp_macro): Update comment + regarding parameters. + * include/cpplib.h (struct cpp_macro_arg, struct cpp_identifier): + Add spelling fields. + (struct cpp_token): Update comment on macro_arg. + * internal.h (_cpp_save_parameter): Add extra argument. + (_cpp_spell_ident_ucns): New declaration. + * lex.c (lex_identifier): Add SPELLING argument. Set *SPELLING to + original spelling of identifier. + (_cpp_lex_direct): Update calls to lex_identifier. + (_cpp_spell_ident_ucns): New function, factored out of + cpp_spell_token. + (cpp_spell_token): Adjust FORSTRING argument semantics to return + original spelling of identifiers. Use _cpp_spell_ident_ucns in + !FORSTRING case. + (_cpp_equiv_tokens): Check spellings of identifiers and macro + arguments are identical. + * macro.c (macro_arg_saved_data): New structure. + (paste_tokens): Use original spellings of identifiers from + cpp_spell_token. + (_cpp_save_parameter): Add argument SPELLING. Save both canonical + node and its value. + (parse_params): Update calls to _cpp_save_parameter. + (lex_expansion_token): Save spelling of macro argument tokens. + (_cpp_create_definition): Extract canonical node from saved data. + (cpp_macro_definition): Use UCNs in spelling of macro name. Use + original spellings of macro argument tokens and identifiers. + * traditional.c (scan_parameters): Update call to + _cpp_save_parameter. + 2014-11-05 Joseph Myers <joseph@codesourcery.com> PR preprocessor/9449 diff --git a/libcpp/include/cpp-id-data.h b/libcpp/include/cpp-id-data.h index d4c0091..97a2f67 100644 --- a/libcpp/include/cpp-id-data.h +++ b/libcpp/include/cpp-id-data.h @@ -33,7 +33,9 @@ struct GTY(()) answer { /* Each macro definition is recorded in a cpp_macro structure. Variadic macros cannot occur with traditional cpp. */ struct GTY(()) cpp_macro { - /* Parameters, if any. */ + /* Parameters, if any. If parameter names use extended identifiers, + the original spelling of those identifiers, not the canonical + UTF-8 spelling, goes here. */ cpp_hashnode ** GTY ((nested_ptr (union tree_node, "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL", "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"), diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 06d18d4..7f8e719 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -208,6 +208,12 @@ enum cpp_token_fld_kind { struct GTY(()) cpp_macro_arg { /* Argument number. */ unsigned int arg_no; + /* The original spelling of the macro argument token. */ + cpp_hashnode * + GTY ((nested_ptr (union tree_node, + "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL", + "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"))) + spelling; }; /* An identifier in the cpp_token union. */ @@ -218,6 +224,12 @@ struct GTY(()) cpp_identifier { "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL", "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"))) node; + /* The original spelling of the identifier. */ + cpp_hashnode * + GTY ((nested_ptr (union tree_node, + "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL", + "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"))) + spelling; }; /* A preprocessing token. This has been carefully packed and should @@ -238,7 +250,7 @@ struct GTY(()) cpp_token { /* A string, or number. */ struct cpp_string GTY ((tag ("CPP_TOKEN_FLD_STR"))) str; - /* Argument no. for a CPP_MACRO_ARG. */ + /* Argument no. (and original spelling) for a CPP_MACRO_ARG. */ struct cpp_macro_arg GTY ((tag ("CPP_TOKEN_FLD_ARG_NO"))) macro_arg; /* Original token no. for a CPP_PASTE (from a sequence of diff --git a/libcpp/internal.h b/libcpp/internal.h index 602a503..427f4c6 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -616,7 +616,8 @@ extern bool _cpp_create_definition (cpp_reader *, cpp_hashnode *); extern void _cpp_pop_context (cpp_reader *); extern void _cpp_push_text_context (cpp_reader *, cpp_hashnode *, const unsigned char *, size_t); -extern bool _cpp_save_parameter (cpp_reader *, cpp_macro *, cpp_hashnode *); +extern bool _cpp_save_parameter (cpp_reader *, cpp_macro *, cpp_hashnode *, + cpp_hashnode *); extern bool _cpp_arguments_ok (cpp_reader *, cpp_macro *, const cpp_hashnode *, unsigned int); extern const unsigned char *_cpp_builtin_macro_text (cpp_reader *, @@ -665,6 +666,7 @@ extern bool _cpp_skip_block_comment (cpp_reader *); extern cpp_token *_cpp_temp_token (cpp_reader *); extern const cpp_token *_cpp_lex_token (cpp_reader *); extern cpp_token *_cpp_lex_direct (cpp_reader *); +extern unsigned char *_cpp_spell_ident_ucns (unsigned char *, cpp_hashnode *); extern int _cpp_equiv_tokens (const cpp_token *, const cpp_token *); extern void _cpp_init_tokenrun (tokenrun *, unsigned int); extern cpp_hashnode *_cpp_lex_identifier (cpp_reader *, const char *); diff --git a/libcpp/lex.c b/libcpp/lex.c index 45eaca7..2247386 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1302,7 +1302,7 @@ _cpp_lex_identifier (cpp_reader *pfile, const char *name) /* Lex an identifier starting at BUFFER->CUR - 1. */ static cpp_hashnode * lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, - struct normalize_state *nst) + struct normalize_state *nst, cpp_hashnode **spelling) { cpp_hashnode *result; const uchar *cur; @@ -1332,6 +1332,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, } while (forms_identifier_p (pfile, false, nst)); result = _cpp_interpret_identifier (pfile, base, pfile->buffer->cur - base); + *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); } else { @@ -1340,6 +1341,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC)); + *spelling = result; } /* Rarely, identifiers require diagnostics when lexed. */ @@ -2388,7 +2390,8 @@ _cpp_lex_direct (cpp_reader *pfile) { struct normalize_state nst = INITIAL_NORMALIZE_STATE; result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false, - &nst); + &nst, + &result->val.node.spelling); warn_about_normalization (pfile, result, &nst); } @@ -2666,7 +2669,8 @@ _cpp_lex_direct (cpp_reader *pfile) if (forms_identifier_p (pfile, true, &nst)) { result->type = CPP_NAME; - result->val.node.node = lex_identifier (pfile, base, true, &nst); + result->val.node.node = lex_identifier (pfile, base, true, &nst, + &result->val.node.spelling); warn_about_normalization (pfile, result, &nst); break; } @@ -2740,11 +2744,35 @@ cpp_digraph2name (enum cpp_ttype type) return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH]; } +/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER. + The buffer must already contain the enough space to hold the + token's spelling. Returns a pointer to the character after the + last character written. */ +unsigned char * +_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident) +{ + size_t i; + const unsigned char *name = NODE_NAME (ident); + + for (i = 0; i < NODE_LEN (ident); i++) + if (name[i] & ~0x7F) + { + i += utf8_to_ucn (buffer, name + i) - 1; + buffer += 10; + } + else + *buffer++ = name[i]; + + return buffer; +} + /* Write the spelling of a token TOKEN to BUFFER. The buffer must already contain the enough space to hold the token's spelling. Returns a pointer to the character after the last character written. FORSTRING is true if this is to be the spelling after translation - phase 1 (this is different for UCNs). + phase 1 (with the original spelling of extended identifiers), false + if extended identifiers should always be written using UCNs (there is + no option for always writing them in the internal UTF-8 form). FIXME: Would be nice if we didn't need the PFILE argument. */ unsigned char * cpp_spell_token (cpp_reader *pfile, const cpp_token *token, @@ -2773,24 +2801,12 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token, case SPELL_IDENT: if (forstring) { - memcpy (buffer, NODE_NAME (token->val.node.node), - NODE_LEN (token->val.node.node)); - buffer += NODE_LEN (token->val.node.node); + memcpy (buffer, NODE_NAME (token->val.node.spelling), + NODE_LEN (token->val.node.spelling)); + buffer += NODE_LEN (token->val.node.spelling); } else - { - size_t i; - const unsigned char * name = NODE_NAME (token->val.node.node); - - for (i = 0; i < NODE_LEN (token->val.node.node); i++) - if (name[i] & ~0x7F) - { - i += utf8_to_ucn (buffer, name + i) - 1; - buffer += 10; - } - else - *buffer++ = NODE_NAME (token->val.node.node)[i]; - } + buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node); break; case SPELL_LITERAL: @@ -2904,9 +2920,11 @@ _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no); case SPELL_NONE: return (a->type != CPP_MACRO_ARG - || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no); + || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no + && a->val.macro_arg.spelling == b->val.macro_arg.spelling)); case SPELL_IDENT: - return a->val.node.node == b->val.node.node; + return (a->val.node.node == b->val.node.node + && a->val.node.spelling == b->val.node.spelling); case SPELL_LITERAL: return (a->val.str.len == b->val.str.len && !memcmp (a->val.str.text, b->val.str.text, diff --git a/libcpp/macro.c b/libcpp/macro.c index 8fa9770..678bf2b 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.c @@ -80,6 +80,15 @@ struct macro_arg_token_iter #endif }; +/* Saved data about an identifier being used as a macro argument + name. */ +struct macro_arg_saved_data { + /* The canonical (UTF-8) spelling of this identifier. */ + cpp_hashnode *canonical_node; + /* The previous value of this identifier. */ + union _cpp_hashnode_value value; +}; + /* Macro expansion. */ static int enter_macro_context (cpp_reader *, cpp_hashnode *, @@ -590,7 +599,7 @@ paste_tokens (cpp_reader *pfile, source_location location, len = cpp_token_len (*plhs) + cpp_token_len (rhs) + 1; buf = (unsigned char *) alloca (len); - end = lhsend = cpp_spell_token (pfile, *plhs, buf, false); + end = lhsend = cpp_spell_token (pfile, *plhs, buf, true); /* Avoid comment headers, since they are still processed in stage 3. It is simpler to insert a space here, rather than modifying the @@ -600,7 +609,7 @@ paste_tokens (cpp_reader *pfile, source_location location, *end++ = ' '; /* In one obscure case we might see padding here. */ if (rhs->type != CPP_PADDING) - end = cpp_spell_token (pfile, rhs, end, false); + end = cpp_spell_token (pfile, rhs, end, true); *end = '\n'; cpp_push_buffer (pfile, buf, end - buf, /* from_stage3 */ true); @@ -2748,10 +2757,12 @@ _cpp_free_definition (cpp_hashnode *h) h->flags &= ~(NODE_BUILTIN | NODE_DISABLED | NODE_USED); } -/* Save parameter NODE to the parameter list of macro MACRO. Returns - zero on success, nonzero if the parameter is a duplicate. */ +/* Save parameter NODE (spelling SPELLING) to the parameter list of + macro MACRO. Returns zero on success, nonzero if the parameter is + a duplicate. */ bool -_cpp_save_parameter (cpp_reader *pfile, cpp_macro *macro, cpp_hashnode *node) +_cpp_save_parameter (cpp_reader *pfile, cpp_macro *macro, cpp_hashnode *node, + cpp_hashnode *spelling) { unsigned int len; /* Constraint 6.10.3.6 - duplicate parameter names. */ @@ -2766,17 +2777,20 @@ _cpp_save_parameter (cpp_reader *pfile, cpp_macro *macro, cpp_hashnode *node) < (macro->paramc + 1) * sizeof (cpp_hashnode *)) _cpp_extend_buff (pfile, &pfile->a_buff, sizeof (cpp_hashnode *)); - ((cpp_hashnode **) BUFF_FRONT (pfile->a_buff))[macro->paramc++] = node; + ((cpp_hashnode **) BUFF_FRONT (pfile->a_buff))[macro->paramc++] = spelling; node->flags |= NODE_MACRO_ARG; - len = macro->paramc * sizeof (union _cpp_hashnode_value); + len = macro->paramc * sizeof (struct macro_arg_saved_data); if (len > pfile->macro_buffer_len) { pfile->macro_buffer = XRESIZEVEC (unsigned char, pfile->macro_buffer, len); pfile->macro_buffer_len = len; } - ((union _cpp_hashnode_value *) pfile->macro_buffer)[macro->paramc - 1] - = node->value; + struct macro_arg_saved_data save; + save.value = node->value; + save.canonical_node = node; + ((struct macro_arg_saved_data *) pfile->macro_buffer)[macro->paramc - 1] + = save; node->value.arg_index = macro->paramc; return false; @@ -2816,7 +2830,8 @@ parse_params (cpp_reader *pfile, cpp_macro *macro) } prev_ident = 1; - if (_cpp_save_parameter (pfile, macro, token->val.node.node)) + if (_cpp_save_parameter (pfile, macro, token->val.node.node, + token->val.node.spelling)) return false; continue; @@ -2839,6 +2854,7 @@ parse_params (cpp_reader *pfile, cpp_macro *macro) if (!prev_ident) { _cpp_save_parameter (pfile, macro, + pfile->spec_nodes.n__VA_ARGS__, pfile->spec_nodes.n__VA_ARGS__); pfile->state.va_args_ok = 1; if (! CPP_OPTION (pfile, c99) @@ -2909,8 +2925,10 @@ lex_expansion_token (cpp_reader *pfile, cpp_macro *macro) if (token->type == CPP_NAME && (token->val.node.node->flags & NODE_MACRO_ARG) != 0) { + cpp_hashnode *spelling = token->val.node.spelling; token->type = CPP_MACRO_ARG; token->val.macro_arg.arg_no = token->val.node.node->value.arg_index; + token->val.macro_arg.spelling = spelling; } else if (CPP_WTRADITIONAL (pfile) && macro->paramc > 0 && (token->type == CPP_STRING || token->type == CPP_CHAR)) @@ -3162,9 +3180,11 @@ _cpp_create_definition (cpp_reader *pfile, cpp_hashnode *node) /* Clear the fast argument lookup indices. */ for (i = macro->paramc; i-- > 0; ) { - struct cpp_hashnode *node = macro->params[i]; + struct macro_arg_saved_data *save = + &((struct macro_arg_saved_data *) pfile->macro_buffer)[i]; + struct cpp_hashnode *node = save->canonical_node; node->flags &= ~ NODE_MACRO_ARG; - node->value = ((union _cpp_hashnode_value *) pfile->macro_buffer)[i]; + node->value = save->value; } if (!ok) @@ -3285,7 +3305,7 @@ cpp_macro_definition (cpp_reader *pfile, cpp_hashnode *node) macro = node->value.macro; /* Calculate length. */ - len = NODE_LEN (node) + 2; /* ' ' and NUL. */ + len = NODE_LEN (node) * 10 + 2; /* ' ' and NUL. */ if (macro->fun_like) { len += 4; /* "()" plus possible final ".." of named @@ -3305,7 +3325,7 @@ cpp_macro_definition (cpp_reader *pfile, cpp_hashnode *node) cpp_token *token = ¯o->exp.tokens[i]; if (token->type == CPP_MACRO_ARG) - len += NODE_LEN (macro->params[token->val.macro_arg.arg_no - 1]); + len += NODE_LEN (token->val.macro_arg.spelling); else len += cpp_token_len (token); @@ -3327,8 +3347,7 @@ cpp_macro_definition (cpp_reader *pfile, cpp_hashnode *node) /* Fill in the buffer. Start with the macro name. */ buffer = pfile->macro_buffer; - memcpy (buffer, NODE_NAME (node), NODE_LEN (node)); - buffer += NODE_LEN (node); + buffer = _cpp_spell_ident_ucns (buffer, node); /* Parameter names. */ if (macro->fun_like) @@ -3377,12 +3396,12 @@ cpp_macro_definition (cpp_reader *pfile, cpp_hashnode *node) if (token->type == CPP_MACRO_ARG) { memcpy (buffer, - NODE_NAME (macro->params[token->val.macro_arg.arg_no - 1]), - NODE_LEN (macro->params[token->val.macro_arg.arg_no - 1])); - buffer += NODE_LEN (macro->params[token->val.macro_arg.arg_no - 1]); + NODE_NAME (token->val.macro_arg.spelling), + NODE_LEN (token->val.macro_arg.spelling)); + buffer += NODE_LEN (token->val.macro_arg.spelling); } else - buffer = cpp_spell_token (pfile, token, buffer, false); + buffer = cpp_spell_token (pfile, token, buffer, true); if (token->flags & PASTE_LEFT) { diff --git a/libcpp/traditional.c b/libcpp/traditional.c index dfb5378..3d40c2f 100644 --- a/libcpp/traditional.c +++ b/libcpp/traditional.c @@ -959,8 +959,9 @@ scan_parameters (cpp_reader *pfile, cpp_macro *macro) if (is_idstart (*cur)) { + struct cpp_hashnode *id = lex_identifier (pfile, cur); ok = false; - if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur))) + if (_cpp_save_parameter (pfile, macro, id, id)) break; cur = skip_whitespace (pfile, CUR (pfile->context), true /* skip_comments */); |