aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Sebor <msebor@redhat.com>2020-07-20 12:06:18 -0600
committerMartin Sebor <msebor@redhat.com>2020-07-20 12:08:58 -0600
commitd5803b9876b3d11c93d1a10fabb3fbb1c4a14bd6 (patch)
tree21c9c55bfd9003436d22c960d3578579af9dd744
parent3e99ed65cbedf7a6c0abb9cd63c191326995fd34 (diff)
downloadgcc-d5803b9876b3d11c93d1a10fabb3fbb1c4a14bd6.zip
gcc-d5803b9876b3d11c93d1a10fabb3fbb1c4a14bd6.tar.gz
gcc-d5803b9876b3d11c93d1a10fabb3fbb1c4a14bd6.tar.bz2
Correct handling of constant representations containing embedded nuls.
Resolves: PR middle-end/95189 - memcmp being wrongly stripped like strcm PR middle-end/95886 - suboptimal memcpy with embedded zero bytes gcc/ChangeLog: PR middle-end/95189 PR middle-end/95886 * builtins.c (inline_expand_builtin_string_cmp): Rename... (inline_expand_builtin_bytecmp): ...to this. (builtin_memcpy_read_str): Don't expect data to be nul-terminated. (expand_builtin_memory_copy_args): Handle object representations with embedded nul bytes. (expand_builtin_memcmp): Same. (expand_builtin_strcmp): Adjust call to naming change. (expand_builtin_strncmp): Same. * expr.c (string_constant): Create empty strings with nonzero size. * fold-const.c (c_getstr): Rename locals and update comments. * tree.c (build_string): Accept null pointer argument. (build_string_literal): Same. * tree.h (build_string): Provide a default. (build_string_literal): Same. gcc/testsuite/ChangeLog: PR middle-end/95189 PR middle-end/95886 * gcc.dg/memcmp-pr95189.c: New test. * gcc.dg/strncmp-3.c: New test. * gcc.target/i386/memcpy-pr95886.c: New test.
-rw-r--r--gcc/builtins.c153
-rw-r--r--gcc/expr.c4
-rw-r--r--gcc/fold-const.c73
-rw-r--r--gcc/testsuite/gcc.dg/memcmp-pr95189.c28
-rw-r--r--gcc/testsuite/gcc.dg/strncmp-3.c57
-rw-r--r--gcc/testsuite/gcc.target/i386/memcpy-pr95886.c107
-rw-r--r--gcc/tree.c28
-rw-r--r--gcc/tree.h5
8 files changed, 345 insertions, 110 deletions
diff --git a/gcc/builtins.c b/gcc/builtins.c
index eb66211..228db78 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -122,7 +122,7 @@ static rtx expand_builtin_next_arg (void);
static rtx expand_builtin_va_start (tree);
static rtx expand_builtin_va_end (tree);
static rtx expand_builtin_va_copy (tree);
-static rtx inline_expand_builtin_string_cmp (tree, rtx);
+static rtx inline_expand_builtin_bytecmp (tree, rtx);
static rtx expand_builtin_strcmp (tree, rtx);
static rtx expand_builtin_strncmp (tree, rtx, machine_mode);
static rtx builtin_memcpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
@@ -3230,20 +3230,18 @@ expand_builtin_strnlen (tree exp, rtx target, machine_mode target_mode)
}
/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE)
- bytes from constant string DATA + OFFSET and return it as target
- constant. */
+ bytes from bytes at DATA + OFFSET and return it reinterpreted as
+ a target constant. */
static rtx
builtin_memcpy_read_str (void *data, HOST_WIDE_INT offset,
scalar_int_mode mode)
{
- const char *str = (const char *) data;
+ /* The REPresentation pointed to by DATA need not be a nul-terminated
+ string but the caller guarantees it's large enough for MODE. */
+ const char *rep = (const char *) data;
- gcc_assert (offset >= 0
- && ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode)
- <= strlen (str) + 1));
-
- return c_readstr (str + offset, mode);
+ return c_readstr (rep + offset, mode, /*nul_terminated=*/false);
}
/* LEN specify length of the block of memcpy/memset operation.
@@ -4414,7 +4412,6 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
rtx target, tree exp, memop_ret retmode,
bool might_overlap)
{
- const char *src_str;
unsigned int src_align = get_pointer_alignment (src);
unsigned int dest_align = get_pointer_alignment (dest);
rtx dest_mem, src_mem, dest_addr, len_rtx;
@@ -4446,24 +4443,29 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
len_rtx = expand_normal (len);
determine_block_size (len, len_rtx, &min_size, &max_size,
&probable_max_size);
- src_str = c_getstr (src);
-
- /* If SRC is a string constant and block move would be done by
- pieces, we can avoid loading the string from memory and only
- stored the computed constants. This works in the overlap
- (memmove) case as well because store_by_pieces just generates a
- series of stores of constants from the string constant returned
- by c_getstr(). */
- if (src_str
+
+ /* Try to get the byte representation of the constant SRC points to,
+ with its byte size in NBYTES. */
+ unsigned HOST_WIDE_INT nbytes;
+ const char *rep = c_getstr (src, &nbytes);
+
+ /* If the function's constant bound LEN_RTX is less than or equal
+ to the byte size of the representation of the constant argument,
+ and if block move would be done by pieces, we can avoid loading
+ the bytes from memory and only store the computed constant.
+ This works in the overlap (memmove) case as well because
+ store_by_pieces just generates a series of stores of constants
+ from the representation returned by c_getstr(). */
+ if (rep
&& CONST_INT_P (len_rtx)
- && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1
+ && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= nbytes
&& can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str,
- CONST_CAST (char *, src_str),
+ CONST_CAST (char *, rep),
dest_align, false))
{
dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx),
builtin_memcpy_read_str,
- CONST_CAST (char *, src_str),
+ CONST_CAST (char *, rep),
dest_align, false, retmode);
dest_mem = force_operand (XEXP (dest_mem, 0), target);
dest_mem = convert_memory_address (ptr_mode, dest_mem);
@@ -4487,7 +4489,8 @@ expand_builtin_memory_copy_args (tree dest, tree src, tree len,
dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx, method,
expected_align, expected_size,
min_size, max_size, probable_max_size,
- use_mempcpy_call, &is_move_done, might_overlap);
+ use_mempcpy_call, &is_move_done,
+ might_overlap);
/* Bail out when a mempcpy call would be expanded as libcall and when
we have a target that provides a fast implementation
@@ -5322,7 +5325,7 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
if (!result_eq && fcode != BUILT_IN_BCMP)
{
- result = inline_expand_builtin_string_cmp (exp, target);
+ result = inline_expand_builtin_bytecmp (exp, target);
if (result)
return result;
}
@@ -5350,26 +5353,32 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
by_pieces_constfn constfn = NULL;
- const char *src_str = c_getstr (arg2);
- if (result_eq && src_str == NULL)
+ /* Try to get the byte representation of the constant ARG2 (or, only
+ when the function's result is used for equality to zero, ARG1)
+ points to, with its byte size in NBYTES. */
+ unsigned HOST_WIDE_INT nbytes;
+ const char *rep = c_getstr (arg2, &nbytes);
+ if (result_eq && rep == NULL)
{
- src_str = c_getstr (arg1);
- if (src_str != NULL)
+ /* For equality to zero the arguments are interchangeable. */
+ rep = c_getstr (arg1, &nbytes);
+ if (rep != NULL)
std::swap (arg1_rtx, arg2_rtx);
}
- /* If SRC is a string constant and block move would be done
- by pieces, we can avoid loading the string from memory
- and only stored the computed constants. */
- if (src_str
+ /* If the function's constant bound LEN_RTX is less than or equal
+ to the byte size of the representation of the constant argument,
+ and if block move would be done by pieces, we can avoid loading
+ the bytes from memory and only store the computed constant result. */
+ if (rep
&& CONST_INT_P (len_rtx)
- && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1)
+ && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= nbytes)
constfn = builtin_memcpy_read_str;
result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx,
TREE_TYPE (len), target,
result_eq, constfn,
- CONST_CAST (char *, src_str));
+ CONST_CAST (char *, rep));
if (result)
{
@@ -5408,7 +5417,7 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target)
/* Due to the performance benefit, always inline the calls first. */
rtx result = NULL_RTX;
- result = inline_expand_builtin_string_cmp (exp, target);
+ result = inline_expand_builtin_bytecmp (exp, target);
if (result)
return result;
@@ -5532,7 +5541,7 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target,
/* Due to the performance benefit, always inline the calls first. */
rtx result = NULL_RTX;
- result = inline_expand_builtin_string_cmp (exp, target);
+ result = inline_expand_builtin_bytecmp (exp, target);
if (result)
return result;
@@ -7765,18 +7774,18 @@ inline_string_cmp (rtx target, tree var_str, const char *const_str,
return result;
}
-/* Inline expansion a call to str(n)cmp, with result going to
- TARGET if that's convenient.
+/* Inline expansion of a call to str(n)cmp and memcmp, with result going
+ to TARGET if that's convenient.
If the call is not been inlined, return NULL_RTX. */
+
static rtx
-inline_expand_builtin_string_cmp (tree exp, rtx target)
+inline_expand_builtin_bytecmp (tree exp, rtx target)
{
tree fndecl = get_callee_fndecl (exp);
enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
- unsigned HOST_WIDE_INT length = 0;
bool is_ncmp = (fcode == BUILT_IN_STRNCMP || fcode == BUILT_IN_MEMCMP);
- /* Do NOT apply this inlining expansion when optimizing for size or
+ /* Do NOT apply this inlining expansion when optimizing for size or
optimization level below 2. */
if (optimize < 2 || optimize_insn_for_size_p ())
return NULL_RTX;
@@ -7799,29 +7808,47 @@ inline_expand_builtin_string_cmp (tree exp, rtx target)
unsigned HOST_WIDE_INT len2 = 0;
unsigned HOST_WIDE_INT len3 = 0;
- const char *src_str1 = c_getstr (arg1, &len1);
- const char *src_str2 = c_getstr (arg2, &len2);
+ /* Get the object representation of the initializers of ARG1 and ARG2
+ as strings, provided they refer to constant objects, with their byte
+ sizes in LEN1 and LEN2, respectively. */
+ const char *bytes1 = c_getstr (arg1, &len1);
+ const char *bytes2 = c_getstr (arg2, &len2);
- /* If neither strings is constant string, the call is not qualify. */
- if (!src_str1 && !src_str2)
+ /* Fail if neither argument refers to an initialized constant. */
+ if (!bytes1 && !bytes2)
return NULL_RTX;
- /* For strncmp, if the length is not a const, not qualify. */
if (is_ncmp)
{
+ /* Fail if the memcmp/strncmp bound is not a constant. */
if (!tree_fits_uhwi_p (len3_tree))
return NULL_RTX;
- else
- len3 = tree_to_uhwi (len3_tree);
- }
- if (src_str1 != NULL)
- len1 = strnlen (src_str1, len1) + 1;
+ len3 = tree_to_uhwi (len3_tree);
- if (src_str2 != NULL)
- len2 = strnlen (src_str2, len2) + 1;
+ if (fcode == BUILT_IN_MEMCMP)
+ {
+ /* Fail if the memcmp bound is greater than the size of either
+ of the two constant objects. */
+ if ((bytes1 && len1 < len3)
+ || (bytes2 && len2 < len3))
+ return NULL_RTX;
+ }
+ }
- int const_str_n = 0;
+ if (fcode != BUILT_IN_MEMCMP)
+ {
+ /* For string functions (i.e., strcmp and strncmp) reduce LEN1
+ and LEN2 to the length of the nul-terminated string stored
+ in each. */
+ if (bytes1 != NULL)
+ len1 = strnlen (bytes1, len1) + 1;
+ if (bytes2 != NULL)
+ len2 = strnlen (bytes2, len2) + 1;
+ }
+
+ /* See inline_string_cmp. */
+ int const_str_n;
if (!len1)
const_str_n = 2;
else if (!len2)
@@ -7831,23 +7858,23 @@ inline_expand_builtin_string_cmp (tree exp, rtx target)
else
const_str_n = 2;
- gcc_checking_assert (const_str_n > 0);
- length = (const_str_n == 1) ? len1 : len2;
-
- if (is_ncmp && len3 < length)
- length = len3;
+ /* For strncmp only, compute the new bound as the smallest of
+ the lengths of the two strings (plus 1) and the bound provided
+ to the function. */
+ unsigned HOST_WIDE_INT bound = (const_str_n == 1) ? len1 : len2;
+ if (is_ncmp && len3 < bound)
+ bound = len3;
- /* If the length of the comparision is larger than the threshold,
+ /* If the bound of the comparison is larger than the threshold,
do nothing. */
- if (length > (unsigned HOST_WIDE_INT)
- param_builtin_string_cmp_inline_length)
+ if (bound > (unsigned HOST_WIDE_INT) param_builtin_string_cmp_inline_length)
return NULL_RTX;
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
/* Now, start inline expansion the call. */
return inline_string_cmp (target, (const_str_n == 1) ? arg2 : arg1,
- (const_str_n == 1) ? src_str1 : src_str2, length,
+ (const_str_n == 1) ? bytes1 : bytes2, bound,
const_str_n, mode);
}
diff --git a/gcc/expr.c b/gcc/expr.c
index edc5571..b4bbeff 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -11829,12 +11829,12 @@ string_constant (tree arg, tree *ptr_offset, tree *mem_size, tree *decl)
while (TREE_CODE (chartype) == ARRAY_TYPE)
chartype = TREE_TYPE (chartype);
/* Convert a char array to an empty STRING_CST having an array
- of the expected type. */
+ of the expected type and size. */
if (!initsize)
initsize = integer_zero_node;
unsigned HOST_WIDE_INT size = tree_to_uhwi (initsize);
- init = build_string_literal (size ? 1 : 0, "", chartype, size);
+ init = build_string_literal (size, NULL, chartype, size);
init = TREE_OPERAND (init, 0);
init = TREE_OPERAND (init, 0);
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index cfae846..300d959 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -15487,24 +15487,29 @@ fold_build_pointer_plus_hwi_loc (location_t loc, tree ptr, HOST_WIDE_INT off)
ptr, size_int (off));
}
-/* Return a pointer P to a NUL-terminated string representing the sequence
- of constant characters referred to by SRC (or a subsequence of such
- characters within it if SRC is a reference to a string plus some
- constant offset). If STRLEN is non-null, store the number of bytes
- in the string constant including the terminating NUL char. *STRLEN is
- typically strlen(P) + 1 in the absence of embedded NUL characters. */
+/* Return a pointer P to a NUL-terminated string containing the sequence
+ of bytes corresponding to the representation of the object referred to
+ by SRC (or a subsequence of such bytes within it if SRC is a reference
+ to an initialized constant array plus some constant offset).
+ If STRSIZE is non-null, store the number of bytes in the constant
+ sequence including the terminating NUL byte. *STRSIZE is equal to
+ sizeof(A) - OFFSET where A is the array that stores the constant
+ sequence that SRC points to and OFFSET is the byte offset of SRC from
+ the beginning of A. SRC need not point to a string or even an array
+ of characters but may point to an object of any type. */
const char *
-c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */)
+c_getstr (tree src, unsigned HOST_WIDE_INT *strsize /* = NULL */)
{
+ /* The offset into the array A storing the string, and A's byte size. */
tree offset_node;
tree mem_size;
- if (strlen)
- *strlen = 0;
+ if (strsize)
+ *strsize = 0;
src = string_constant (src, &offset_node, &mem_size, NULL);
- if (src == 0)
+ if (!src)
return NULL;
unsigned HOST_WIDE_INT offset = 0;
@@ -15519,34 +15524,44 @@ c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */)
if (!tree_fits_uhwi_p (mem_size))
return NULL;
- /* STRING_LENGTH is the size of the string literal, including any
- embedded NULs. STRING_SIZE is the size of the array the string
- literal is stored in. */
- unsigned HOST_WIDE_INT string_length = TREE_STRING_LENGTH (src);
- unsigned HOST_WIDE_INT string_size = tree_to_uhwi (mem_size);
+ /* ARRAY_SIZE is the byte size of the array the constant sequence
+ is stored in and equal to sizeof A. INIT_BYTES is the number
+ of bytes in the constant sequence used to initialize the array,
+ including any embedded NULs as well as the terminating NUL (for
+ strings), but not including any trailing zeros/NULs past
+ the terminating one appended implicitly to a string literal to
+ zero out the remainder of the array it's stored in. For example,
+ given:
+ const char a[7] = "abc\0d";
+ n = strlen (a + 1);
+ ARRAY_SIZE is 7, INIT_BYTES is 6, and OFFSET is 1. For a valid
+ (i.e., nul-terminated) string with no embedded nuls, INIT_BYTES
+ is equal to strlen (A) + 1. */
+ const unsigned HOST_WIDE_INT array_size = tree_to_uhwi (mem_size);
+ unsigned HOST_WIDE_INT init_bytes = TREE_STRING_LENGTH (src);
/* Ideally this would turn into a gcc_checking_assert over time. */
- if (string_length > string_size)
- string_length = string_size;
+ if (init_bytes > array_size)
+ init_bytes = array_size;
const char *string = TREE_STRING_POINTER (src);
/* Ideally this would turn into a gcc_checking_assert over time. */
- if (string_length > string_size)
- string_length = string_size;
+ if (init_bytes > array_size)
+ init_bytes = array_size;
- if (string_length == 0
- || offset >= string_size)
+ if (init_bytes == 0 || offset >= array_size)
return NULL;
- if (strlen)
+ if (strsize)
{
- /* Compute and store the length of the substring at OFFSET.
- All offsets past the initial length refer to null strings. */
- if (offset < string_length)
- *strlen = string_length - offset;
+ /* Compute and store the number of characters from the beginning
+ of the substring at OFFSET to the end, including the terminating
+ nul. Offsets past the initial length refer to null strings. */
+ if (offset < init_bytes)
+ *strsize = init_bytes - offset;
else
- *strlen = 1;
+ *strsize = 1;
}
else
{
@@ -15554,11 +15569,11 @@ c_getstr (tree src, unsigned HOST_WIDE_INT *strlen /* = NULL */)
/* Support only properly NUL-terminated single byte strings. */
if (tree_to_uhwi (TYPE_SIZE_UNIT (eltype)) != 1)
return NULL;
- if (string[string_length - 1] != '\0')
+ if (string[init_bytes - 1] != '\0')
return NULL;
}
- return offset < string_length ? string + offset : "";
+ return offset < init_bytes ? string + offset : "";
}
/* Given a tree T, compute which bits in T may be nonzero. */
diff --git a/gcc/testsuite/gcc.dg/memcmp-pr95189.c b/gcc/testsuite/gcc.dg/memcmp-pr95189.c
new file mode 100644
index 0000000..d8250ec
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/memcmp-pr95189.c
@@ -0,0 +1,28 @@
+/* PR middle-end/95189 - memcmp being wrongly stripped like strcmp
+ { dg-do run }
+ { dg-options "-O2 -Wall" } */
+
+char a4[] = "\0abc";
+char a8[] = "\0abcdefg";
+char a16[] = "\0abcdefghijklmno";
+
+int cmp4 (void)
+{
+ return __builtin_memcmp (a4, "\0\0\0\0", 4);
+}
+
+int cmp8 (void)
+{
+ return __builtin_memcmp (a8, "\0\0\0\0\0\0\0\0", 8);
+}
+
+int cmp16 (void)
+{
+ return __builtin_memcmp (a16, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16);
+}
+
+int main (void)
+{
+ if (cmp4 () < 1 || cmp8 () < 1 || cmp16 () < 1)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/strncmp-3.c b/gcc/testsuite/gcc.dg/strncmp-3.c
new file mode 100644
index 0000000..0e8101c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strncmp-3.c
@@ -0,0 +1,57 @@
+/* PR middle-end/95189 - memcmp being wrongly stripped like strcmp
+ { dg-do run }
+ { dg-options "-O2 -Wall" } */
+
+#define AB_D "ab\0d"
+#define ABCDEF_H "abcdef\0h"
+#define ABCDEFGHIJKLMN_P "abcdefghijklmn\0p"
+
+char ab_d[] = AB_D;
+char abcdef_h[] = ABCDEF_H;
+
+extern int strncmp (const char*, const char*, __SIZE_TYPE__);
+
+__attribute__((noipa)) void sink (const void *p, ...) { (void)&p; }
+
+#define strncmp(a, b, n) (sink (a, b), strncmp (a, b, n))
+
+int main (void)
+{
+ int zero = 0;
+
+ zero += strncmp (ab_d, AB_D, 1);
+ zero += strncmp (ab_d, AB_D, 2);
+ zero += strncmp (ab_d, AB_D, 3);
+ zero += strncmp (ab_d, AB_D, 4);
+ zero += strncmp (ab_d, AB_D, 5);
+
+ zero += strncmp (ab_d, ABCDEF_H, 1);
+ zero += strncmp (ab_d, ABCDEF_H, 2);
+
+ zero += strncmp (abcdef_h, AB_D, 2);
+
+ zero += strncmp (abcdef_h, ABCDEF_H, 2);
+ zero += strncmp (abcdef_h, ABCDEF_H, 3);
+ zero += strncmp (abcdef_h, ABCDEF_H, 4);
+ zero += strncmp (abcdef_h, ABCDEF_H, 5);
+ zero += strncmp (abcdef_h, ABCDEF_H, 6);
+ zero += strncmp (abcdef_h, ABCDEF_H, 7);
+ zero += strncmp (abcdef_h, ABCDEF_H, 8);
+ zero += strncmp (abcdef_h, ABCDEF_H, 9);
+
+ if (zero != 0)
+ __builtin_abort ();
+
+ int neg = 0;
+
+ neg -= strncmp (ab_d, ABCDEF_H, 3) < 0;
+ neg -= strncmp (ab_d, ABCDEF_H, 4) < 0;
+ neg -= strncmp (ab_d, ABCDEF_H, 5) < 0;
+ neg -= strncmp (ab_d, ABCDEF_H, 6) < 0;
+ neg -= strncmp (ab_d, ABCDEF_H, 7) < 0;
+ neg -= strncmp (ab_d, ABCDEF_H, 8) < 0;
+ neg -= strncmp (ab_d, ABCDEF_H, 9) < 0;
+
+ if (neg != -7)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/memcpy-pr95886.c b/gcc/testsuite/gcc.target/i386/memcpy-pr95886.c
new file mode 100644
index 0000000..c0a04d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/memcpy-pr95886.c
@@ -0,0 +1,107 @@
+/* PR middle-end/95886 - suboptimal memcpy with embedded zero bytes
+ { dg-do compile }
+ { dg-options "-O2 -Wall -fdump-rtl-expand" } */
+
+const char a1234567890[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+
+void cpy_123456789 (void *d)
+{
+ /* Expands into:
+ movabsq $578437695752307201, %rax
+ movb $9, 8(%rdi)
+ movq %rax, (%rdi) */
+ __builtin_memcpy (d, a1234567890, 9);
+}
+
+const char a1234567800[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 0 };
+
+void cpy_1234567800 (void *d)
+{
+ /* Expands into:
+ movabsq $578437695752307201, %rax
+ movb $0, 8(%rdi)
+ movq %rax, (%rdi) */
+ __builtin_memcpy (d, a1234567800, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 578437695752307201" 2 "expand"} } */
+
+
+const char a0234567890[10] = { 0, 2, 3, 4, 5, 6, 7, 8, 9 };
+
+void cpy_023456789 (void *d)
+{
+ __builtin_memcpy (d, a0234567890, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 578437695752307200" 1 "expand"} } */
+
+
+const char a1034567890[10] = { 1, 0, 3, 4, 5, 6, 7, 8, 9 };
+
+void cpy_103456789 (void *d)
+{
+ __builtin_memcpy (d, a1034567890, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 578437695752306689" 1 "expand"} } */
+
+
+const char a1204567890[10] = { 1, 2, 0, 4, 5, 6, 7, 8, 9 };
+
+void cpy_120456789 (void *d)
+{
+ __builtin_memcpy (d, a1204567890, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 578437695752110593" 1 "expand"} } */
+
+
+const char a1230567890[10] = { 1, 2, 3, 0, 5, 6, 7, 8, 9 };
+
+void cpy_123056789 (void *d)
+{
+ __builtin_memcpy (d, a1230567890, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 578437695685198337" 1 "expand"} } */
+
+
+const char a1234067890[10] = { 1, 2, 3, 4, 0, 6, 7, 8, 9 };
+
+void cpy_123406789 (void *d)
+{
+ __builtin_memcpy (d, a1234067890, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 578437695685198337" 1 "expand"} } */
+
+
+const char a1234507890[10] = { 1, 2, 3, 4, 5, 0, 7, 8, 9 };
+
+void cpy_123450789 (void *d)
+{
+ __builtin_memcpy (d, a1234507890, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 578431098682540545" 1 "expand"} } */
+
+
+const char a1234560890[10] = { 1, 2, 3, 4, 5, 6, 0, 8, 9 };
+
+void cpy_123456089 (void *d)
+{
+ __builtin_memcpy (d, a1234560890, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 576467370915332609" 1 "expand"} } */
+
+
+const char a1234567090[10] = { 1, 2, 3, 4, 5, 6, 7, 0, 9 };
+
+void cpy_123456709 (void *d)
+{
+ __builtin_memcpy (d, a1234567090, 9);
+}
+
+/* { dg-final { scan-rtl-dump-times "const_int 1976943448883713" 1 "expand"} } */
diff --git a/gcc/tree.c b/gcc/tree.c
index 9102f8d..6522a08 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -2206,29 +2206,29 @@ build_real_from_int_cst (tree type, const_tree i)
return v;
}
-/* Return a newly constructed STRING_CST node whose value is
- the LEN characters at STR.
+/* Return a newly constructed STRING_CST node whose value is the LEN
+ characters at STR when STR is nonnull, or all zeros otherwise.
Note that for a C string literal, LEN should include the trailing NUL.
The TREE_TYPE is not initialized. */
tree
-build_string (int len, const char *str)
+build_string (unsigned len, const char *str /*= NULL */)
{
- tree s;
- size_t length;
-
/* Do not waste bytes provided by padding of struct tree_string. */
- length = len + offsetof (struct tree_string, str) + 1;
+ unsigned size = len + offsetof (struct tree_string, str) + 1;
- record_node_allocation_statistics (STRING_CST, length);
+ record_node_allocation_statistics (STRING_CST, size);
- s = (tree) ggc_internal_alloc (length);
+ tree s = (tree) ggc_internal_alloc (size);
memset (s, 0, sizeof (struct tree_typed));
TREE_SET_CODE (s, STRING_CST);
TREE_CONSTANT (s) = 1;
TREE_STRING_LENGTH (s) = len;
- memcpy (s->string.str, str, len);
+ if (str)
+ memcpy (s->string.str, str, len);
+ else
+ memset (s->string.str, 0, len);
s->string.str[len] = '\0';
return s;
@@ -11572,12 +11572,12 @@ build_alloca_call_expr (tree size, unsigned int align, HOST_WIDE_INT max_size)
/* Create a new constant string literal of type ELTYPE[SIZE] (or LEN
if SIZE == -1) and return a tree node representing char* pointer to
- it as an ADDR_EXPR (ARRAY_REF (ELTYPE, ...)). The STRING_CST value
- is the LEN bytes at STR (the representation of the string, which may
- be wide). */
+ it as an ADDR_EXPR (ARRAY_REF (ELTYPE, ...)). When STR is nonnull
+ the STRING_CST value is the LEN bytes at STR (the representation
+ of the string, which may be wide). Otherwise it's all zeros. */
tree
-build_string_literal (int len, const char *str,
+build_string_literal (unsigned len, const char *str /* = NULL */,
tree eltype /* = char_type_node */,
unsigned HOST_WIDE_INT size /* = -1 */)
{
diff --git a/gcc/tree.h b/gcc/tree.h
index 866d9ba..8adc28e 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4425,7 +4425,7 @@ extern tree build_one_cst (tree);
extern tree build_minus_one_cst (tree);
extern tree build_all_ones_cst (tree);
extern tree build_zero_cst (tree);
-extern tree build_string (int, const char *);
+extern tree build_string (unsigned, const char * = NULL);
extern tree build_poly_int_cst (tree, const poly_wide_int_ref &);
extern tree build_tree_list (tree, tree CXX_MEM_STAT_INFO);
extern tree build_tree_list_vec (const vec<tree, va_gc> * CXX_MEM_STAT_INFO);
@@ -4456,7 +4456,8 @@ extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn,
extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree,
int, ...);
extern tree build_alloca_call_expr (tree, unsigned int, HOST_WIDE_INT);
-extern tree build_string_literal (int, const char *, tree = char_type_node,
+extern tree build_string_literal (unsigned, const char * = NULL,
+ tree = char_type_node,
unsigned HOST_WIDE_INT = HOST_WIDE_INT_M1U);
/* Construct various nodes representing data types. */