aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog22
-rw-r--r--gcc/c-lex.c66
-rw-r--r--gcc/cppexp.c8
-rw-r--r--gcc/cpphash.h2
-rw-r--r--gcc/cppinit.c33
-rw-r--r--gcc/cpplex.c82
-rw-r--r--gcc/cpplib.c12
-rw-r--r--gcc/cpplib.h28
8 files changed, 149 insertions, 104 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 144f744..ba33bc4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2002-05-04 Neil Booth <neil@daikokuya.demon.co.uk>
+
+ * c-lex.c (lex_string): Let cpp_parse_escape handles truncation
+ and sign-extension.
+ (lex_charconst): Update for change in prototype of
+ cpp_interpret_charconst. Extend from cppchar_t to HOST_WIDE_INT
+ appropriately.
+ * cpphash.h (BITS_PER_CPPCHAR_T): New.
+ * cppinit.c (cpp_create_reader): Initialize them for no
+ change in semantics.
+ (cpp_post_options): Add sanity checks.
+ * cpplex.c (cpp_parse_escape): Handle precision, sign-extension
+ and truncation issues. Calculate in type cppchar_t.
+ (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove.
+ (cpp_interpret_charconst): Calculate in type cppchar_t. Handle
+ run-time dependent precision correctly. Return whether the
+ result is signed or not.
+ * cpplib.c (dequote_string): Use cppchar_t; update.
+ * cpplib.h (cppchar_signed_t): New.
+ struct cpp_options): New precision members.
+ (cpp_interpret_charconst, cpp_parse_escape): Update prototypes.
+
2002-05-03 David S. Miller <davem@redhat.com>
* config/sparc/sparc-protos.h (sparc_rtx_costs): New.
diff --git a/gcc/c-lex.c b/gcc/c-lex.c
index acdcf34..0c10f30 100644
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -1238,9 +1238,7 @@ lex_string (str, len, wide)
char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
char *q = buf;
const unsigned char *p = str, *limit = str + len;
- unsigned int c;
- unsigned width = wide ? WCHAR_TYPE_SIZE
- : TYPE_PRECISION (char_type_node);
+ cppchar_t c;
#ifdef MULTIBYTE_CHARS
/* Reset multibyte conversion state. */
@@ -1270,15 +1268,7 @@ lex_string (str, len, wide)
#endif
if (c == '\\' && !ignore_escape_flag)
- {
- unsigned int mask;
-
- if (width < HOST_BITS_PER_INT)
- mask = ((unsigned int) 1 << width) - 1;
- else
- mask = ~0;
- c = cpp_parse_escape (parse_in, &p, limit, mask);
- }
+ c = cpp_parse_escape (parse_in, &p, limit, wide);
/* Add this single character into the buffer either as a wchar_t,
a multibyte sequence, or as a single byte. */
@@ -1345,45 +1335,31 @@ static tree
lex_charconst (token)
const cpp_token *token;
{
- HOST_WIDE_INT result;
+ cppchar_t result;
tree type, value;
unsigned int chars_seen;
+ int unsignedp;
result = cpp_interpret_charconst (parse_in, token, warn_multichar,
- &chars_seen);
- if (token->type == CPP_WCHAR)
- {
- value = build_int_2 (result, 0);
- type = wchar_type_node;
- }
- else
- {
- if (result < 0)
- value = build_int_2 (result, -1);
- else
- value = build_int_2 (result, 0);
-
- /* In C, a character constant has type 'int'.
- In C++ 'char', but multi-char charconsts have type 'int'. */
- if (c_language == clk_cplusplus && chars_seen <= 1)
- type = char_type_node;
- else
- type = integer_type_node;
- }
+ &chars_seen, &unsignedp);
- /* cpp_interpret_charconst issues a warning if the constant
- overflows, but if the number fits in HOST_WIDE_INT anyway, it
- will return it un-truncated, which may cause problems down the
- line. So set the type to widest_integer_literal_type, call
- convert to truncate it to the proper type, then clear
- TREE_OVERFLOW so we don't get a second warning.
-
- FIXME: cpplib's assessment of overflow may not be accurate on a
- platform where the final type can change at (compiler's) runtime. */
+ /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
+ before possibly widening to HOST_WIDE_INT for build_int_2. */
+ if (unsignedp || (cppchar_signed_t) result >= 0)
+ value = build_int_2 (result, 0);
+ else
+ value = build_int_2 ((cppchar_signed_t) result, -1);
- TREE_TYPE (value) = widest_integer_literal_type_node;
- value = convert (type, value);
- TREE_OVERFLOW (value) = 0;
+ if (token->type == CPP_WCHAR)
+ type = wchar_type_node;
+ /* In C, a character constant has type 'int'.
+ In C++ 'char', but multi-char charconsts have type 'int'. */
+ else if ((c_language == clk_c || c_language == clk_objective_c)
+ || chars_seen > 1)
+ type = integer_type_node;
+ else
+ type = char_type_node;
+ TREE_TYPE (value) = type;
return value;
}
diff --git a/gcc/cppexp.c b/gcc/cppexp.c
index 914a207..b71b02a 100644
--- a/gcc/cppexp.c
+++ b/gcc/cppexp.c
@@ -283,10 +283,10 @@ eval_token (pfile, token)
const cpp_token *token;
{
unsigned int temp;
+ int unsignedp = 0;
struct op op;
op.op = CPP_NUMBER;
- op.unsignedp = 0;
switch (token->type)
{
@@ -294,9 +294,8 @@ eval_token (pfile, token)
return parse_number (pfile, token);
case CPP_WCHAR:
- op.unsignedp = WCHAR_UNSIGNED;
- case CPP_CHAR: /* Always unsigned. */
- op.value = cpp_interpret_charconst (pfile, token, 1, &temp);
+ case CPP_CHAR:
+ op.value = cpp_interpret_charconst (pfile, token, 1, &temp, &unsignedp);
break;
case CPP_NAME:
@@ -331,6 +330,7 @@ eval_token (pfile, token)
op.value = temp;
}
+ op.unsignedp = unsignedp;
return op;
}
diff --git a/gcc/cpphash.h b/gcc/cpphash.h
index 5ad0c6e..7baf8ff 100644
--- a/gcc/cpphash.h
+++ b/gcc/cpphash.h
@@ -29,6 +29,8 @@ struct directive; /* Deliberately incomplete. */
struct pending_option;
struct op;
+#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))
+
/* Test if a sign is valid within a preprocessing number. */
#define VALID_SIGN(c, prevc) \
(((c) == '+' || (c) == '-') && \
diff --git a/gcc/cppinit.c b/gcc/cppinit.c
index cee7571..cb5b263 100644
--- a/gcc/cppinit.c
+++ b/gcc/cppinit.c
@@ -502,6 +502,18 @@ cpp_create_reader (lang)
CPP_OPTION (pfile, pending) =
(struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending));
+ /* CPP arithmetic done to existing rules for now. */
+#define BITS_PER_HOST_WIDEST_INT (CHAR_BIT * sizeof (HOST_WIDEST_INT))
+ CPP_OPTION (pfile, precision) = BITS_PER_HOST_WIDEST_INT;
+#ifndef MAX_CHAR_TYPE_SIZE
+#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
+#endif
+ CPP_OPTION (pfile, char_precision) = MAX_CHAR_TYPE_SIZE;
+#ifndef MAX_WCHAR_TYPE_SIZE
+#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
+#endif
+ CPP_OPTION (pfile, wchar_precision) = MAX_WCHAR_TYPE_SIZE;
+
/* It's simplest to just create this struct whether or not it will
be needed. */
pfile->deps = deps_init ();
@@ -1796,6 +1808,27 @@ cpp_post_options (pfile)
fputc ('\n', stderr);
}
+#if ENABLE_CHECKING
+ /* Sanity checks for CPP arithmetic. */
+ if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT)
+ cpp_error (pfile, DL_FATAL,
+ "preprocessor arithmetic has maximum precision of %u bits; target requires %u bits",
+ BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision));
+
+ if (CPP_OPTION (pfile, char_precision) > BITS_PER_CPPCHAR_T
+ || CPP_OPTION (pfile, wchar_precision) > BITS_PER_CPPCHAR_T)
+ cpp_error (pfile, DL_FATAL,
+ "CPP cannot handle (wide) character constants over %u bits",
+ BITS_PER_CPPCHAR_T);
+
+ {
+ cppchar_t test = 0;
+ test--;
+ if (test < 1)
+ cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type");
+ }
+#endif
+
/* Canonicalize in_fname and out_fname. We guarantee they are not
NULL, and that the empty string represents stdin / stdout. */
if (CPP_OPTION (pfile, in_fname) == NULL
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index bc12978..0a26049 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -1710,23 +1710,33 @@ maybe_read_ucs (pfile, pstr, limit, pc)
return 0;
}
-/* Interpret an escape sequence, and return its value. PSTR points to
- the input pointer, which is just after the backslash. LIMIT is how
- much text we have. MASK is a bitmask for the precision for the
- destination type (char or wchar_t).
-
- Handles all relevant diagnostics. */
-unsigned int
-cpp_parse_escape (pfile, pstr, limit, mask)
+/* Returns the value of an escape sequence, truncated to the correct
+ target precision. PSTR points to the input pointer, which is just
+ after the backslash. LIMIT is how much text we have. WIDE is true
+ if the escape sequence is part of a wide character constant or
+ string literal. Handles all relevant diagnostics. */
+cppchar_t
+cpp_parse_escape (pfile, pstr, limit, wide)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
- unsigned HOST_WIDE_INT mask;
+ int wide;
{
int unknown = 0;
const unsigned char *str = *pstr;
- unsigned int c = *str++;
+ cppchar_t c, mask;
+ unsigned int width;
+
+ if (wide)
+ width = CPP_OPTION (pfile, wchar_precision);
+ else
+ width = CPP_OPTION (pfile, char_precision);
+ if (width < BITS_PER_CPPCHAR_T)
+ mask = ((cppchar_t) 1 << width) - 1;
+ else
+ mask = ~0;
+ c = *str++;
switch (c)
{
case '\\': case '\'': case '"': case '?': break;
@@ -1767,7 +1777,7 @@ cpp_parse_escape (pfile, pstr, limit, mask)
"the meaning of '\\x' is different in traditional C");
{
- unsigned int i = 0, overflow = 0;
+ cppchar_t i = 0, overflow = 0;
int digits_found = 0;
while (str < limit)
@@ -1798,8 +1808,8 @@ cpp_parse_escape (pfile, pstr, limit, mask)
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
- unsigned int i = c - '0';
- int count = 0;
+ size_t count = 0;
+ cppchar_t i = c - '0';
while (str < limit && ++count < 3)
{
@@ -1834,36 +1844,33 @@ cpp_parse_escape (pfile, pstr, limit, mask)
}
if (c > mask)
- cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
+ {
+ cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
+ c &= mask;
+ }
*pstr = str;
return c;
}
-#ifndef MAX_CHAR_TYPE_SIZE
-#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
-#endif
-
-#ifndef MAX_WCHAR_TYPE_SIZE
-#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
-#endif
-
/* Interpret a (possibly wide) character constant in TOKEN.
- WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
- to a variable that is filled in with the number of characters seen. */
-HOST_WIDE_INT
-cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
+ WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
+ points to a variable that is filled in with the number of
+ characters seen, and UNSIGNEDP to a variable that indicates whether
+ the result has signed type. */
+cppchar_t
+cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen, unsignedp)
cpp_reader *pfile;
const cpp_token *token;
int warn_multi;
unsigned int *pchars_seen;
+ int *unsignedp;
{
const unsigned char *str = token->val.str.text;
const unsigned char *limit = str + token->val.str.len;
unsigned int chars_seen = 0;
- unsigned int width, max_chars, c;
- unsigned HOST_WIDE_INT mask;
- HOST_WIDE_INT result = 0;
+ unsigned int width, max_chars;
+ cppchar_t c, mask, result = 0;
bool unsigned_p;
#ifdef MULTIBYTE_CHARS
@@ -1873,20 +1880,20 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
/* Width in bits. */
if (token->type == CPP_CHAR)
{
- width = MAX_CHAR_TYPE_SIZE;
+ width = CPP_OPTION (pfile, char_precision);
unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
}
else
{
- width = MAX_WCHAR_TYPE_SIZE;
+ width = CPP_OPTION (pfile, wchar_precision);
unsigned_p = WCHAR_UNSIGNED;
}
- if (width < HOST_BITS_PER_WIDE_INT)
- mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
+ if (width < BITS_PER_CPPCHAR_T)
+ mask = ((cppchar_t) 1 << width) - 1;
else
mask = ~0;
- max_chars = HOST_BITS_PER_WIDE_INT / width;
+ max_chars = BITS_PER_CPPCHAR_T / width;
while (str < limit)
{
@@ -1911,7 +1918,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
#endif
if (c == '\\')
- c = cpp_parse_escape (pfile, &str, limit, mask);
+ c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
#ifdef MAP_CHARACTER
if (ISPRINT (c))
@@ -1921,7 +1928,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
/* Merge character into result; ignore excess chars. */
if (++chars_seen <= max_chars)
{
- if (width < HOST_BITS_PER_WIDE_INT)
+ if (width < BITS_PER_CPPCHAR_T)
result = (result << width) | (c & mask);
else
result = c;
@@ -1943,7 +1950,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
{
unsigned int nbits = chars_seen * width;
- mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
+ mask = (cppchar_t) ~0 >> (BITS_PER_CPPCHAR_T - nbits);
if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
result &= mask;
else
@@ -1951,6 +1958,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
}
*pchars_seen = chars_seen;
+ *unsignedp = unsigned_p;
return result;
}
diff --git a/gcc/cpplib.c b/gcc/cpplib.c
index b210209..c90224c 100644
--- a/gcc/cpplib.c
+++ b/gcc/cpplib.c
@@ -726,23 +726,15 @@ dequote_string (pfile, str, len)
uchar *result = _cpp_unaligned_alloc (pfile, len + 1);
uchar *dst = result;
const uchar *limit = str + len;
- unsigned int c;
- unsigned HOST_WIDE_INT mask;
+ cppchar_t c;
- /* We need the mask to match the host's 'unsigned char', not the
- target's. */
- if (CHAR_BIT < HOST_BITS_PER_WIDE_INT)
- mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1;
- else
- mask = ~(unsigned HOST_WIDE_INT)0;
-
while (str < limit)
{
c = *str++;
if (c != '\\')
*dst++ = c;
else
- *dst++ = cpp_parse_escape (pfile, (const uchar **)&str, limit, mask);
+ *dst++ = cpp_parse_escape (pfile, &str, limit, 0);
}
*dst++ = '\0';
return result;
diff --git a/gcc/cpplib.h b/gcc/cpplib.h
index bbf272b..520f2a2 100644
--- a/gcc/cpplib.h
+++ b/gcc/cpplib.h
@@ -190,9 +190,12 @@ struct cpp_token
} val;
};
-/* A standalone character. It is unsigned for the same reason we use
- unsigned char - to avoid signedness issues. */
+/* A type wide enough to hold any multibyte source character.
+ cpplib's character constant interpreter uses shifts, and so
+ requires an unsigned type. */
typedef unsigned int cppchar_t;
+/* Its signed equivalent. */
+typedef int cppchar_signed_t;
/* Values for opts.dump_macros.
dump_only means inhibit output of the preprocessed text
@@ -237,6 +240,10 @@ struct cpp_options
/* -fleading_underscore sets this to "_". */
const char *user_label_prefix;
+ /* Precision for target CPP arithmetic, target characters and target
+ wide characters, respectively. */
+ size_t precision, char_precision, wchar_precision;
+
/* The language we're preprocessing. */
enum c_lang lang;
@@ -535,9 +542,9 @@ extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
extern void _cpp_backup_tokens PARAMS ((cpp_reader *, unsigned int));
/* Evaluate a CPP_CHAR or CPP_WCHAR token. */
-extern HOST_WIDE_INT
+extern cppchar_t
cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
- int, unsigned int *));
+ int, unsigned int *, int *));
extern void cpp_define PARAMS ((cpp_reader *, const char *));
extern void cpp_assert PARAMS ((cpp_reader *, const char *));
@@ -600,10 +607,15 @@ extern int cpp_ideq PARAMS ((const cpp_token *,
extern void cpp_output_line PARAMS ((cpp_reader *, FILE *));
extern void cpp_output_token PARAMS ((const cpp_token *, FILE *));
extern const char *cpp_type2name PARAMS ((enum cpp_ttype));
-extern unsigned int cpp_parse_escape PARAMS ((cpp_reader *,
- const unsigned char **,
- const unsigned char *,
- unsigned HOST_WIDE_INT));
+/* Returns the value of an escape sequence, truncated to the correct
+ target precision. PSTR points to the input pointer, which is just
+ after the backslash. LIMIT is how much text we have. WIDE is true
+ if the escape sequence is part of a wide character constant or
+ string literal. Handles all relevant diagnostics. */
+extern cppchar_t cpp_parse_escape PARAMS ((cpp_reader *,
+ const unsigned char ** pstr,
+ const unsigned char *limit,
+ int wide));
/* In cpphash.c */