diff options
Diffstat (limited to 'libcpp/lex.c')
-rw-r--r-- | libcpp/lex.c | 62 |
1 files changed, 40 insertions, 22 deletions
diff --git a/libcpp/lex.c b/libcpp/lex.c index 45eaca7..2247386 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1302,7 +1302,7 @@ _cpp_lex_identifier (cpp_reader *pfile, const char *name) /* Lex an identifier starting at BUFFER->CUR - 1. */ static cpp_hashnode * lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, - struct normalize_state *nst) + struct normalize_state *nst, cpp_hashnode **spelling) { cpp_hashnode *result; const uchar *cur; @@ -1332,6 +1332,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, } while (forms_identifier_p (pfile, false, nst)); result = _cpp_interpret_identifier (pfile, base, pfile->buffer->cur - base); + *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); } else { @@ -1340,6 +1341,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn, result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC)); + *spelling = result; } /* Rarely, identifiers require diagnostics when lexed. */ @@ -2388,7 +2390,8 @@ _cpp_lex_direct (cpp_reader *pfile) { struct normalize_state nst = INITIAL_NORMALIZE_STATE; result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false, - &nst); + &nst, + &result->val.node.spelling); warn_about_normalization (pfile, result, &nst); } @@ -2666,7 +2669,8 @@ _cpp_lex_direct (cpp_reader *pfile) if (forms_identifier_p (pfile, true, &nst)) { result->type = CPP_NAME; - result->val.node.node = lex_identifier (pfile, base, true, &nst); + result->val.node.node = lex_identifier (pfile, base, true, &nst, + &result->val.node.spelling); warn_about_normalization (pfile, result, &nst); break; } @@ -2740,11 +2744,35 @@ cpp_digraph2name (enum cpp_ttype type) return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH]; } +/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER. + The buffer must already contain the enough space to hold the + token's spelling. Returns a pointer to the character after the + last character written. */ +unsigned char * +_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident) +{ + size_t i; + const unsigned char *name = NODE_NAME (ident); + + for (i = 0; i < NODE_LEN (ident); i++) + if (name[i] & ~0x7F) + { + i += utf8_to_ucn (buffer, name + i) - 1; + buffer += 10; + } + else + *buffer++ = name[i]; + + return buffer; +} + /* Write the spelling of a token TOKEN to BUFFER. The buffer must already contain the enough space to hold the token's spelling. Returns a pointer to the character after the last character written. FORSTRING is true if this is to be the spelling after translation - phase 1 (this is different for UCNs). + phase 1 (with the original spelling of extended identifiers), false + if extended identifiers should always be written using UCNs (there is + no option for always writing them in the internal UTF-8 form). FIXME: Would be nice if we didn't need the PFILE argument. */ unsigned char * cpp_spell_token (cpp_reader *pfile, const cpp_token *token, @@ -2773,24 +2801,12 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token, case SPELL_IDENT: if (forstring) { - memcpy (buffer, NODE_NAME (token->val.node.node), - NODE_LEN (token->val.node.node)); - buffer += NODE_LEN (token->val.node.node); + memcpy (buffer, NODE_NAME (token->val.node.spelling), + NODE_LEN (token->val.node.spelling)); + buffer += NODE_LEN (token->val.node.spelling); } else - { - size_t i; - const unsigned char * name = NODE_NAME (token->val.node.node); - - for (i = 0; i < NODE_LEN (token->val.node.node); i++) - if (name[i] & ~0x7F) - { - i += utf8_to_ucn (buffer, name + i) - 1; - buffer += 10; - } - else - *buffer++ = NODE_NAME (token->val.node.node)[i]; - } + buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node); break; case SPELL_LITERAL: @@ -2904,9 +2920,11 @@ _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no); case SPELL_NONE: return (a->type != CPP_MACRO_ARG - || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no); + || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no + && a->val.macro_arg.spelling == b->val.macro_arg.spelling)); case SPELL_IDENT: - return a->val.node.node == b->val.node.node; + return (a->val.node.node == b->val.node.node + && a->val.node.spelling == b->val.node.spelling); case SPELL_LITERAL: return (a->val.str.len == b->val.str.len && !memcmp (a->val.str.text, b->val.str.text, |