[gdb/c] Fix type of 2147483648 and literal truncation

[ Assuming arch i386:x86-64, sizeof (int) == 4, sizeof (long) == sizeof (long long) == 8. ] Currently we have (decimal for 0x80000000): ... (gdb) ptype 2147483648 type = unsigned int ... According to C language rules, unsigned types cannot be used for decimal constants, so the type should be long instead (reported in PR16377). Fix this by making sure the type of 2147483648 is long. The next interesting case is (decimal for 0x8000000000000000): ... (gdb) ptype 9223372036854775808 type = unsigned long ... According to the same rules, unsigned long is incorrect. Current gcc uses __int128 as type, which is allowed, but we don't have that available in gdb, so the strict response here would be erroring out with overflow. Older gcc without __int128 support, as well as clang use an unsigned type, but with a warning. Interestingly, clang uses "unsigned long long" while gcc uses "unsigned long", which seems the better choice. Given that the compilers allow this as a convience, do the same in gdb and keep type "unsigned long", and make this explicit in parser and test-case. Furthermore, make sure we error out on overflow instead of truncating in all cases. Tested on x86_64-linux with --enable-targets=all. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=16377
author: Tom de Vries <tdevries@suse.de> 2022-06-04 13:17:33 +0200
committer: Tom de Vries <tdevries@suse.de> 2022-06-04 13:17:33 +0200
commit: 1d8c0dfae79a5183e9e3311fb867afd679bc8e84 (patch)
tree: 2415ed745f9dc5a2828564fe0e90af01b93fcadb
parent: 1b4633f812b329863a523c4d798c2b55417b5144 (diff)
download: gdb-1d8c0dfae79a5183e9e3311fb867afd679bc8e84.zip
gdb-1d8c0dfae79a5183e9e3311fb867afd679bc8e84.tar.gz
gdb-1d8c0dfae79a5183e9e3311fb867afd679bc8e84.tar.bz2
4 files changed, 130 insertions, 76 deletions
diff --git a/gdb/c-exp.y b/gdb/c-exp.y
index 72f8dd3..61a61fc 100644
--- a/gdb/c-exp.y
+++ b/gdb/c-exp.y
@@ -1929,7 +1929,6 @@ parse_number (struct parser_state *par_state,
 {
   ULONGEST n = 0;
   ULONGEST prevn = 0;
-  ULONGEST un;
 
   int i = 0;
   int c;
@@ -1945,9 +1944,6 @@ parse_number (struct parser_state *par_state,
   /* We have found a "L" or "U" (or "i") suffix.  */
   int found_suffix = 0;
 
-  ULONGEST high_bit;
-  struct type *signed_type;
-  struct type *unsigned_type;
   char *p;
 
   p = (char *) alloca (len);
@@ -2095,18 +2091,12 @@ parse_number (struct parser_state *par_state,
       if (i >= base)
 	return ERROR;		/* Invalid digit in this base */
 
-      /* Portably test for overflow (only works for nonzero values, so make
-	 a second check for zero).  FIXME: Can't we just make n and prevn
-	 unsigned and avoid this?  */
-      if (c != 'l' && c != 'u' && c != 'i' && (prevn >= n) && n != 0)
-	unsigned_p = 1;		/* Try something unsigned */
-
-      /* Portably test for unsigned overflow.
-	 FIXME: This check is wrong; for example it doesn't find overflow
-	 on 0x123456789 when LONGEST is 32 bits.  */
-      if (c != 'l' && c != 'u' && c != 'i' && n != 0)
-	{	
-	  if (unsigned_p && prevn >= n)
+      if (c != 'l' && c != 'u' && c != 'i')
+	{
+	  /* Test for overflow.  */
+	  if (prevn == 0 && n == 0)
+	    ;
+	  else if (prevn >= n)
 	    error (_("Numeric constant too large."));
 	}
       prevn = n;
@@ -2123,58 +2113,45 @@ parse_number (struct parser_state *par_state,
      or gdbarch_long_bit will be that big, sometimes not.  To deal with
      the case where it is we just always shift the value more than
      once, with fewer bits each time.  */
-
-  un = n >> 2;
-  if (long_p == 0
-      && (un >> (gdbarch_int_bit (par_state->gdbarch ()) - 2)) == 0)
-    {
-      high_bit
-	= ((ULONGEST)1) << (gdbarch_int_bit (par_state->gdbarch ()) - 1);
-
-      /* A large decimal (not hex or octal) constant (between INT_MAX
-	 and UINT_MAX) is a long or unsigned long, according to ANSI,
-	 never an unsigned int, but this code treats it as unsigned
-	 int.  This probably should be fixed.  GCC gives a warning on
-	 such constants.  */
-
-      unsigned_type = parse_type (par_state)->builtin_unsigned_int;
-      signed_type = parse_type (par_state)->builtin_int;
-    }
-  else if (long_p <= 1
-	   && (un >> (gdbarch_long_bit (par_state->gdbarch ()) - 2)) == 0)
-    {
-      high_bit
-	= ((ULONGEST)1) << (gdbarch_long_bit (par_state->gdbarch ()) - 1);
-      unsigned_type = parse_type (par_state)->builtin_unsigned_long;
-      signed_type = parse_type (par_state)->builtin_long;
-    }
+  int int_bits = gdbarch_int_bit (par_state->gdbarch ());
+  int long_bits = gdbarch_long_bit (par_state->gdbarch ());
+  int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ());
+  bool have_signed
+    /* No 'u' suffix.  */
+    = !unsigned_p;
+  bool have_unsigned
+    = ((/* 'u' suffix.  */
+	unsigned_p)
+       || (/* Not a decimal.  */
+	   base != 10)
+       || (/* Allowed as a convenience, in case decimal doesn't fit in largest
+	      signed type.  */
+	   !fits_in_type (1, n, long_long_bits, true)));
+  bool have_int
+    /* No 'l' or 'll' suffix.  */
+    = long_p == 0;
+  bool have_long
+    /* No 'll' suffix.  */
+    = long_p <= 1;
+  if (have_int && have_signed && fits_in_type (1, n, int_bits, true))
+    putithere->typed_val_int.type = parse_type (par_state)->builtin_int;
+  else if (have_int && have_unsigned && fits_in_type (1, n, int_bits, false))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_unsigned_int;
+  else if (have_long && have_signed && fits_in_type (1, n, long_bits, true))
+    putithere->typed_val_int.type = parse_type (par_state)->builtin_long;
+  else if (have_long && have_unsigned && fits_in_type (1, n, long_bits, false))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_unsigned_long;
+  else if (have_signed && fits_in_type (1, n, long_long_bits, true))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_long_long;
+  else if (have_unsigned && fits_in_type (1, n, long_long_bits, false))
+    putithere->typed_val_int.type
+      = parse_type (par_state)->builtin_unsigned_long_long;
   else
-    {
-      int shift;
-      if (sizeof (ULONGEST) * HOST_CHAR_BIT
-	  < gdbarch_long_long_bit (par_state->gdbarch ()))
-	/* A long long does not fit in a LONGEST.  */
-	shift = (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
-      else
-	shift = (gdbarch_long_long_bit (par_state->gdbarch ()) - 1);
-      high_bit = (ULONGEST) 1 << shift;
-      unsigned_type = parse_type (par_state)->builtin_unsigned_long_long;
-      signed_type = parse_type (par_state)->builtin_long_long;
-    }
-
-   putithere->typed_val_int.val = n;
-
-   /* If the high bit of the worked out type is set then this number
-      has to be unsigned. */
-
-   if (unsigned_p || (n & high_bit))
-     {
-       putithere->typed_val_int.type = unsigned_type;
-     }
-   else
-     {
-       putithere->typed_val_int.type = signed_type;
-     }
+    error (_("Numeric constant too large."));
+  putithere->typed_val_int.val = n;
 
    if (imaginary_p)
      putithere->typed_val_int.type
diff --git a/gdb/parse.c b/gdb/parse.c
index fb308be..aa12f6f 100644
--- a/gdb/parse.c
+++ b/gdb/parse.c
@@ -611,6 +611,43 @@ parse_float (const char *p, int len,
 {
   return target_float_from_string (data, type, std::string (p, len));
 }
+
+/* Return true if the number N_SIGN * N fits in a type with TYPE_BITS and
+   TYPE_SIGNED_P.  N_SIGNED is either 1 or -1.  */
+
+bool
+fits_in_type (int n_sign, ULONGEST n, int type_bits, bool type_signed_p)
+{
+  /* Normalize -0.  */
+  if (n == 0 && n_sign == -1)
+    n_sign = 1;
+
+  if (n_sign == -1 && !type_signed_p)
+    /* Can't fit a negative number in an unsigned type.  */
+    return false;
+
+  if (type_bits > sizeof (ULONGEST) * 8)
+    return true;
+
+  ULONGEST smax = (ULONGEST)1 << (type_bits - 1);
+  if (n_sign == -1)
+    {
+      /* Negative number, signed type.  */
+      return (n <= smax);
+    }
+  else if (n_sign == 1 && type_signed_p)
+    {
+      /* Positive number, signed type.  */
+      return (n < smax);
+    }
+  else if (n_sign == 1 && !type_signed_p)
+    {
+      /* Positive number, unsigned type.  */
+      return ((n >> 1) >> (type_bits - 1)) == 0;
+    }
+  else
+    gdb_assert_not_reached ("");
+}
 
 /* This function avoids direct calls to fprintf 
    in the parser generated debug code.  */
diff --git a/gdb/parser-defs.h b/gdb/parser-defs.h
index 3be7d6c..ade3039 100644
--- a/gdb/parser-defs.h
+++ b/gdb/parser-defs.h
@@ -412,6 +412,8 @@ extern std::string copy_name (struct stoken);
 
 extern bool parse_float (const char *p, int len,
 			 const struct type *type, gdb_byte *data);
+extern bool fits_in_type (int n_sign, ULONGEST n, int type_bits,
+			  bool type_signed_p);
 
 
 /* Function used to avoid direct calls to fprintf
diff --git a/gdb/testsuite/gdb.base/parse_number.exp b/gdb/testsuite/gdb.base/parse_number.exp
index 7c259e0..638ea34 100644
--- a/gdb/testsuite/gdb.base/parse_number.exp
+++ b/gdb/testsuite/gdb.base/parse_number.exp
@@ -68,6 +68,22 @@ proc fits_in_type { n type_bits type_signedness } {
     }
 }
 
+# Return 1 if LANG is a c-like language, in the sense that it uses the same
+# parser.
+
+proc c_like { lang } {
+    set res 0
+    switch $lang {
+	c
+	- c++
+	- asm
+	- objective-c
+	- opencl
+	- minimal {set res 1}
+    }
+    return $res
+}
+
 # Parse number N for LANG, and return a list of expected type and value.
 
 proc parse_number { lang n } {
@@ -165,26 +181,48 @@ proc parse_number { lang n } {
 			($any|$re_overflow)]
 	}
     } else {
-	# This is wrong for c-like languages.  For the decimal case, we
-	# shouldn't use unsigned.
-	# See PR 16377.
+	if { [c_like $lang] } {
+	    if { $hex_p } {
+		# C Hex.
+		set have_unsigned 1
+	    } else {
+		# C Decimal.  Unsigned not allowed according.
+		if { [fits_in_type $n $long_long_bits s] } {
+		    # Fits in largest signed type.
+		    set have_unsigned 0
+		} else {
+		    # Doesn't fit in largest signed type, so ill-formed, but
+		    # allow unsigned as a convenience, as compilers do (though
+		    # with a warning).
+		    set have_unsigned 1
+		}
+	    }
+	} else {
+	    # Non-C.
+	    set have_unsigned 1
+	}
+
 	if { [fits_in_type $n $int_bits s] } {
 	    return [list int $n]
-	} elseif { [fits_in_type $n $int_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $int_bits u] } {
 	    return [list "unsigned int" $n]
 	} elseif { [fits_in_type $n $long_bits s] } {
 	    return [list long $n]
-	} elseif { [fits_in_type $n $long_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $long_bits u] } {
 	    return [list "unsigned long" $n]
 	} elseif { [fits_in_type $n $long_long_bits s] } {
 	    return [list "long long" $n]
-	} elseif { [fits_in_type $n $long_long_bits u] } {
+	} elseif { $have_unsigned && [fits_in_type $n $long_long_bits u] } {
 	    return [list "unsigned long long" $n]
 	} else {
 	    # Overflow.
-	    # Some truncated value or re_overflow, should be re_overflow.
-	    return [list "((unsigned )?(int|long)|$re_overflow)" \
-			($any|$re_overflow)]
+	    if { [c_like $lang] } {
+		return [list $re_overflow $re_overflow]
+	    } else {
+		# Some truncated value or re_overflow, should be re_overflow.
+		return [list "((unsigned )?(int|long)|$re_overflow)" \
+			    ($any|$re_overflow)]
+	    }
 	}
     }
author	Tom de Vries <tdevries@suse.de>	2022-06-04 13:17:33 +0200
committer	Tom de Vries <tdevries@suse.de>	2022-06-04 13:17:33 +0200
commit	1d8c0dfae79a5183e9e3311fb867afd679bc8e84 (patch)
tree	2415ed745f9dc5a2828564fe0e90af01b93fcadb
parent	1b4633f812b329863a523c4d798c2b55417b5144 (diff)
download	gdb-1d8c0dfae79a5183e9e3311fb867afd679bc8e84.zip gdb-1d8c0dfae79a5183e9e3311fb867afd679bc8e84.tar.gz gdb-1d8c0dfae79a5183e9e3311fb867afd679bc8e84.tar.bz2