aboutsummaryrefslogtreecommitdiff
path: root/libcpp/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcpp/lex.c')
-rw-r--r--libcpp/lex.c62
1 files changed, 40 insertions, 22 deletions
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 45eaca7..2247386 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1302,7 +1302,7 @@ _cpp_lex_identifier (cpp_reader *pfile, const char *name)
/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
- struct normalize_state *nst)
+ struct normalize_state *nst, cpp_hashnode **spelling)
{
cpp_hashnode *result;
const uchar *cur;
@@ -1332,6 +1332,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
} while (forms_identifier_p (pfile, false, nst));
result = _cpp_interpret_identifier (pfile, base,
pfile->buffer->cur - base);
+ *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
}
else
{
@@ -1340,6 +1341,7 @@ lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
base, len, hash, HT_ALLOC));
+ *spelling = result;
}
/* Rarely, identifiers require diagnostics when lexed. */
@@ -2388,7 +2390,8 @@ _cpp_lex_direct (cpp_reader *pfile)
{
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
- &nst);
+ &nst,
+ &result->val.node.spelling);
warn_about_normalization (pfile, result, &nst);
}
@@ -2666,7 +2669,8 @@ _cpp_lex_direct (cpp_reader *pfile)
if (forms_identifier_p (pfile, true, &nst))
{
result->type = CPP_NAME;
- result->val.node.node = lex_identifier (pfile, base, true, &nst);
+ result->val.node.node = lex_identifier (pfile, base, true, &nst,
+ &result->val.node.spelling);
warn_about_normalization (pfile, result, &nst);
break;
}
@@ -2740,11 +2744,35 @@ cpp_digraph2name (enum cpp_ttype type)
return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
}
+/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
+ The buffer must already contain the enough space to hold the
+ token's spelling. Returns a pointer to the character after the
+ last character written. */
+unsigned char *
+_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
+{
+ size_t i;
+ const unsigned char *name = NODE_NAME (ident);
+
+ for (i = 0; i < NODE_LEN (ident); i++)
+ if (name[i] & ~0x7F)
+ {
+ i += utf8_to_ucn (buffer, name + i) - 1;
+ buffer += 10;
+ }
+ else
+ *buffer++ = name[i];
+
+ return buffer;
+}
+
/* Write the spelling of a token TOKEN to BUFFER. The buffer must
already contain the enough space to hold the token's spelling.
Returns a pointer to the character after the last character written.
FORSTRING is true if this is to be the spelling after translation
- phase 1 (this is different for UCNs).
+ phase 1 (with the original spelling of extended identifiers), false
+ if extended identifiers should always be written using UCNs (there is
+ no option for always writing them in the internal UTF-8 form).
FIXME: Would be nice if we didn't need the PFILE argument. */
unsigned char *
cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
@@ -2773,24 +2801,12 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
case SPELL_IDENT:
if (forstring)
{
- memcpy (buffer, NODE_NAME (token->val.node.node),
- NODE_LEN (token->val.node.node));
- buffer += NODE_LEN (token->val.node.node);
+ memcpy (buffer, NODE_NAME (token->val.node.spelling),
+ NODE_LEN (token->val.node.spelling));
+ buffer += NODE_LEN (token->val.node.spelling);
}
else
- {
- size_t i;
- const unsigned char * name = NODE_NAME (token->val.node.node);
-
- for (i = 0; i < NODE_LEN (token->val.node.node); i++)
- if (name[i] & ~0x7F)
- {
- i += utf8_to_ucn (buffer, name + i) - 1;
- buffer += 10;
- }
- else
- *buffer++ = NODE_NAME (token->val.node.node)[i];
- }
+ buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
break;
case SPELL_LITERAL:
@@ -2904,9 +2920,11 @@ _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
case SPELL_NONE:
return (a->type != CPP_MACRO_ARG
- || a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
+ || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
+ && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
case SPELL_IDENT:
- return a->val.node.node == b->val.node.node;
+ return (a->val.node.node == b->val.node.node
+ && a->val.node.spelling == b->val.node.spelling);
case SPELL_LITERAL:
return (a->val.str.len == b->val.str.len
&& !memcmp (a->val.str.text, b->val.str.text,