aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/builtins.c193
-rw-r--r--gcc/doc/invoke.texi5
-rw-r--r--gcc/params.def4
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/strcmpopt_5.c80
-rw-r--r--gcc/testsuite/gcc.dg/strcmpopt_6.c36
7 files changed, 321 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 590efb9..6b776e1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2018-07-13 Qing Zhao <qing.zhao@oracle.com>
+
+ PR middle-end/78809
+ * builtins.c (expand_builtin_memcmp): Inline the calls first
+ when result_eq is false.
+ (expand_builtin_strcmp): Inline the calls first.
+ (expand_builtin_strncmp): Likewise.
+ (inline_string_cmp): New routine. Expand a string compare
+ call by using a sequence of char comparison.
+ (inline_expand_builtin_string_cmp): New routine. Inline expansion
+ a call to str(n)cmp/memcmp.
+ * doc/invoke.texi (--param builtin-string-cmp-inline-length): New option.
+ * params.def (BUILTIN_STRING_CMP_INLINE_LENGTH): New.
+
2018-07-13 Richard Earnshaw <rearnsha@arm.com>
* config/arm/driver-arm.c: Include arm-native.h.
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 820d6c2..839a818 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
#include "memmodel.h"
#include "gimple.h"
#include "predict.h"
+#include "params.h"
#include "tm_p.h"
#include "stringpool.h"
#include "tree-vrp.h"
@@ -118,6 +119,7 @@ static rtx expand_builtin_next_arg (void);
static rtx expand_builtin_va_start (tree);
static rtx expand_builtin_va_end (tree);
static rtx expand_builtin_va_copy (tree);
+static rtx inline_expand_builtin_string_cmp (tree, rtx, bool);
static rtx expand_builtin_strcmp (tree, rtx);
static rtx expand_builtin_strncmp (tree, rtx, machine_mode);
static rtx builtin_memcpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode);
@@ -4443,19 +4445,32 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
tree arg1 = CALL_EXPR_ARG (exp, 0);
tree arg2 = CALL_EXPR_ARG (exp, 1);
tree len = CALL_EXPR_ARG (exp, 2);
+ enum built_in_function fcode = DECL_FUNCTION_CODE (get_callee_fndecl (exp));
+ bool no_overflow = true;
/* Diagnose calls where the specified length exceeds the size of either
object. */
- if (warn_stringop_overflow)
+ tree size = compute_objsize (arg1, 0);
+ no_overflow = check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE,
+ len, /*maxread=*/NULL_TREE, size,
+ /*objsize=*/NULL_TREE);
+ if (no_overflow)
+ {
+ size = compute_objsize (arg2, 0);
+ no_overflow = check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE,
+ len, /*maxread=*/NULL_TREE, size,
+ /*objsize=*/NULL_TREE);
+ }
+
+ /* Due to the performance benefit, always inline the calls first
+ when result_eq is false. */
+ rtx result = NULL_RTX;
+
+ if (!result_eq && fcode != BUILT_IN_BCMP && no_overflow)
{
- tree size = compute_objsize (arg1, 0);
- if (check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len,
- /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE))
- {
- size = compute_objsize (arg2, 0);
- check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len,
- /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE);
- }
+ result = inline_expand_builtin_string_cmp (exp, target, true);
+ if (result)
+ return result;
}
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
@@ -4497,10 +4512,10 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq)
&& (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1)
constfn = builtin_memcpy_read_str;
- rtx result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx,
- TREE_TYPE (len), target,
- result_eq, constfn,
- CONST_CAST (char *, src_str));
+ result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx,
+ TREE_TYPE (len), target,
+ result_eq, constfn,
+ CONST_CAST (char *, src_str));
if (result)
{
@@ -4530,6 +4545,12 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target)
if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE))
return NULL_RTX;
+ /* Due to the performance benefit, always inline the calls first. */
+ rtx result = NULL_RTX;
+ result = inline_expand_builtin_string_cmp (exp, target, false);
+ if (result)
+ return result;
+
insn_code cmpstr_icode = direct_optab_handler (cmpstr_optab, SImode);
insn_code cmpstrn_icode = direct_optab_handler (cmpstrn_optab, SImode);
if (cmpstr_icode == CODE_FOR_nothing && cmpstrn_icode == CODE_FOR_nothing)
@@ -4552,7 +4573,6 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target)
rtx arg1_rtx = get_memory_rtx (arg1, NULL);
rtx arg2_rtx = get_memory_rtx (arg2, NULL);
- rtx result = NULL_RTX;
/* Try to call cmpstrsi. */
if (cmpstr_icode != CODE_FOR_nothing)
result = expand_cmpstr (cmpstr_icode, target, arg1_rtx, arg2_rtx,
@@ -4644,6 +4664,12 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target,
POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
return NULL_RTX;
+ /* Due to the performance benefit, always inline the calls first. */
+ rtx result = NULL_RTX;
+ result = inline_expand_builtin_string_cmp (exp, target, false);
+ if (result)
+ return result;
+
/* If c_strlen can determine an expression for one of the string
lengths, and it doesn't have side effects, then emit cmpstrnsi
using length MIN(strlen(string)+1, arg3). */
@@ -4706,9 +4732,9 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target,
rtx arg1_rtx = get_memory_rtx (arg1, len);
rtx arg2_rtx = get_memory_rtx (arg2, len);
rtx arg3_rtx = expand_normal (len);
- rtx result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx,
- arg2_rtx, TREE_TYPE (len), arg3_rtx,
- MIN (arg1_align, arg2_align));
+ result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx,
+ arg2_rtx, TREE_TYPE (len), arg3_rtx,
+ MIN (arg1_align, arg2_align));
tree fndecl = get_callee_fndecl (exp);
if (result)
@@ -6722,6 +6748,139 @@ expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore)
return target;
}
+/* Expand a string compare operation using a sequence of char comparison
+ to get rid of the calling overhead, with result going to TARGET if
+ that's convenient.
+
+ VAR_STR is the variable string source;
+ CONST_STR is the constant string source;
+ LENGTH is the number of chars to compare;
+ CONST_STR_N indicates which source string is the constant string;
+ IS_MEMCMP indicates whether it's a memcmp or strcmp.
+
+ to: (assume const_str_n is 2, i.e., arg2 is a constant string)
+
+ target = var_str[0] - const_str[0];
+ if (target != 0)
+ goto ne_label;
+ ...
+ target = var_str[length - 2] - const_str[length - 2];
+ if (target != 0)
+ goto ne_label;
+ target = var_str[length - 1] - const_str[length - 1];
+ ne_label:
+ */
+
+static rtx
+inline_string_cmp (rtx target, tree var_str, const char* const_str,
+ unsigned HOST_WIDE_INT length,
+ int const_str_n, machine_mode mode,
+ bool is_memcmp)
+{
+ HOST_WIDE_INT offset = 0;
+ rtx var_rtx_array
+ = get_memory_rtx (var_str, build_int_cst (unsigned_type_node,length));
+ rtx var_rtx = NULL_RTX;
+ rtx const_rtx = NULL_RTX;
+ rtx result = target ? target : gen_reg_rtx (mode);
+ rtx_code_label *ne_label = gen_label_rtx ();
+ tree unit_type_node = is_memcmp ? unsigned_char_type_node : char_type_node;
+
+ start_sequence ();
+
+ for (unsigned HOST_WIDE_INT i = 0; i < length; i++)
+ {
+ var_rtx
+ = adjust_address (var_rtx_array, TYPE_MODE (unit_type_node), offset);
+ const_rtx
+ = builtin_memcpy_read_str (CONST_CAST (char *, const_str),
+ offset,
+ as_a <scalar_int_mode>
+ TYPE_MODE (unit_type_node));
+ rtx op0 = (const_str_n == 1) ? const_rtx : var_rtx;
+ rtx op1 = (const_str_n == 1) ? var_rtx : const_rtx;
+
+ result = expand_simple_binop (mode, MINUS, op0, op1,
+ result, is_memcmp ? 1 : 0, OPTAB_WIDEN);
+ if (i < length - 1)
+ emit_cmp_and_jump_insns (result, CONST0_RTX (mode), NE, NULL_RTX,
+ mode, true, ne_label);
+ offset
+ += GET_MODE_SIZE (as_a <scalar_int_mode> TYPE_MODE (unit_type_node));
+ }
+
+ emit_label (ne_label);
+ rtx_insn *insns = get_insns ();
+ end_sequence ();
+ emit_insn (insns);
+
+ return result;
+}
+
+/* Inline expansion a call to str(n)cmp, with result going to
+ TARGET if that's convenient.
+ If the call is not been inlined, return NULL_RTX. */
+static rtx
+inline_expand_builtin_string_cmp (tree exp, rtx target, bool is_memcmp)
+{
+ tree fndecl = get_callee_fndecl (exp);
+ enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl);
+ unsigned HOST_WIDE_INT length = 0;
+ bool is_ncmp = (fcode == BUILT_IN_STRNCMP || fcode == BUILT_IN_MEMCMP);
+
+ gcc_checking_assert (fcode == BUILT_IN_STRCMP
+ || fcode == BUILT_IN_STRNCMP
+ || fcode == BUILT_IN_MEMCMP);
+
+ tree arg1 = CALL_EXPR_ARG (exp, 0);
+ tree arg2 = CALL_EXPR_ARG (exp, 1);
+ tree len3_tree = is_ncmp ? CALL_EXPR_ARG (exp, 2) : NULL_TREE;
+
+ unsigned HOST_WIDE_INT len1 = 0;
+ unsigned HOST_WIDE_INT len2 = 0;
+ unsigned HOST_WIDE_INT len3 = 0;
+
+ const char *src_str1 = c_getstr (arg1, &len1);
+ const char *src_str2 = c_getstr (arg2, &len2);
+
+ /* If neither strings is constant string, the call is not qualify. */
+ if (!src_str1 && !src_str2)
+ return NULL_RTX;
+
+ /* For strncmp, if the length is not a const, not qualify. */
+ if (is_ncmp && !tree_fits_uhwi_p (len3_tree))
+ return NULL_RTX;
+
+ int const_str_n = 0;
+ if (!len1)
+ const_str_n = 2;
+ else if (!len2)
+ const_str_n = 1;
+ else if (len2 > len1)
+ const_str_n = 1;
+ else
+ const_str_n = 2;
+
+ gcc_checking_assert (const_str_n > 0);
+ length = (const_str_n == 1) ? len1 : len2;
+
+ if (is_ncmp && (len3 = tree_to_uhwi (len3_tree)) < length)
+ length = len3;
+
+ /* If the length of the comparision is larger than the threshold,
+ do nothing. */
+ if (length > (unsigned HOST_WIDE_INT)
+ PARAM_VALUE (BUILTIN_STRING_CMP_INLINE_LENGTH))
+ return NULL_RTX;
+
+ machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+
+ /* Now, start inline expansion the call. */
+ return inline_string_cmp (target, (const_str_n == 1) ? arg2 : arg1,
+ (const_str_n == 1) ? src_str1 : src_str2, length,
+ const_str_n, mode, is_memcmp);
+}
+
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e0e59f6..9804808 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10594,6 +10594,11 @@ Control the probability of the expression having the specified value. This
parameter takes a percentage (i.e. 0 ... 100) as input.
The default probability of 90 is obtained empirically.
+@item builtin-string-cmp-inline-length
+The maximum length of a constant string for a builtin string cmp call
+eligible for inlining.
+The default value is 3.
+
@item align-threshold
Select fraction of the maximal frequency of executions of a basic block in
diff --git a/gcc/params.def b/gcc/params.def
index a3906c2..df3a06b 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -446,6 +446,10 @@ DEFPARAM(BUILTIN_EXPECT_PROBABILITY,
"builtin-expect-probability",
"Set the estimated probability in percentage for builtin expect. The default value is 90% probability.",
90, 0, 100)
+DEFPARAM(BUILTIN_STRING_CMP_INLINE_LENGTH,
+ "builtin-string-cmp-inline-length",
+ "The maximum length of a constant string for a builtin string cmp call eligible for inlining. The default value is 3.",
+ 3, 0, 100)
DEFPARAM(TRACER_DYNAMIC_COVERAGE_FEEDBACK,
"tracer-dynamic-coverage-feedback",
"The percentage of function, weighted by execution frequency, that must be covered by trace formation. Used when profile feedback is available.",
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7b9627f..0c61e97 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2018-07-13 Qing Zhao <qing.zhao@oracle.com>
+
+ PR middle-end/78809
+ * gcc.dg/strcmpopt_5.c: New test.
+ * gcc.dg/strcmpopt_6.c: New test.
+
2018-07-13 Richard Biener <rguenther@suse.de>
PR middle-end/85974
diff --git a/gcc/testsuite/gcc.dg/strcmpopt_5.c b/gcc/testsuite/gcc.dg/strcmpopt_5.c
new file mode 100644
index 0000000..c30fb78
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmpopt_5.c
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-options "-O -fdump-rtl-expand" } */
+
+typedef struct { char s[8]; int x; } S;
+__attribute__ ((noinline)) int
+f1 (S * s)
+{
+ int result = 0;
+ result += __builtin_strncmp (s->s, "ab", 2);
+ result += __builtin_strncmp (s->s, "abc", 3);
+ return result;
+}
+
+__attribute__ ((noinline)) int
+f2 (char *p)
+{
+ int result = 0;
+ result += __builtin_strncmp (p, "ab", 2);
+ result += __builtin_strncmp (p, "abc", 3);
+ return result;
+}
+
+__attribute__ ((noinline)) int
+f3 (S * s)
+{
+ int result = 0;
+ result += __builtin_strcmp (s->s, "a");
+ result += __builtin_strcmp (s->s, "ab");
+ return result;
+}
+
+__attribute__ ((noinline)) int
+f4 (char *p)
+{
+ int result = 0;
+ result += __builtin_strcmp (p, "a");
+ result += __builtin_strcmp (p, "ab");
+ return result;
+}
+
+__attribute__ ((noinline)) int
+f5 (S * s)
+{
+ int result = 0;
+ result += __builtin_memcmp (s->s, "ab", 2);
+ result += __builtin_memcmp (s->s, "abc", 3);
+ return result;
+}
+
+__attribute__ ((noinline)) int
+f6 (char *p)
+{
+ int result = 0;
+ result += __builtin_memcmp (p, "ab", 2);
+ result += __builtin_memcmp (p, "abc", 3);
+ return result;
+}
+
+int main (void)
+{
+ S ss = {{'a','b','c'}, 2};
+ char *s = "abcd";
+
+ if (f1 (&ss) != 0 || f2 (s) != 0)
+ __builtin_abort ();
+
+ if (f3 (&ss) <= 0 || f4 (s) <= 0)
+ __builtin_abort ();
+
+ if (f5 (&ss) != 0 || f6 (s) != 0)
+ __builtin_abort ();
+
+ return 0;
+
+}
+
+
+/* { dg-final { scan-rtl-dump-times "__builtin_strcmp" 0 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "__builtin_strncmp" 0 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "__builtin_memcmp" 0 "expand" } } */
diff --git a/gcc/testsuite/gcc.dg/strcmpopt_6.c b/gcc/testsuite/gcc.dg/strcmpopt_6.c
new file mode 100644
index 0000000..0f8cf87
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strcmpopt_6.c
@@ -0,0 +1,36 @@
+/* When the specified length exceeds one of the arguments of the call to memcmp,
+ the call to memcmp should NOT be inlined. */
+/* { dg-do run } */
+/* { dg-options "-O -fdump-rtl-expand -Wno-stringop-overflow" } */
+
+typedef struct { char s[8]; int x; } S;
+
+__attribute__ ((noinline)) int
+f1 (S * s)
+{
+ int result = 0;
+ result += __builtin_memcmp (s->s, "a", 3);
+ return result;
+}
+
+__attribute__ ((noinline)) int
+f2 (char *p)
+{
+ int result = 0;
+ result += __builtin_memcmp (p, "a", 3);
+ return result;
+}
+
+int main (void)
+{
+ S ss = {{'a','b','c'}, 2};
+ char *s = "abcd";
+
+ if (f1 (&ss) < 0 || f2 (s) < 0)
+ __builtin_abort ();
+
+ return 0;
+
+}
+
+/* { dg-final { scan-rtl-dump-times "__builtin_memcmp" 4 "expand" } } */