diff options
-rw-r--r-- | gcc/ChangeLog | 14 | ||||
-rw-r--r-- | gcc/builtins.c | 193 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 5 | ||||
-rw-r--r-- | gcc/params.def | 4 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/strcmpopt_5.c | 80 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/strcmpopt_6.c | 36 |
7 files changed, 321 insertions, 17 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 590efb9..6b776e1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2018-07-13 Qing Zhao <qing.zhao@oracle.com> + + PR middle-end/78809 + * builtins.c (expand_builtin_memcmp): Inline the calls first + when result_eq is false. + (expand_builtin_strcmp): Inline the calls first. + (expand_builtin_strncmp): Likewise. + (inline_string_cmp): New routine. Expand a string compare + call by using a sequence of char comparison. + (inline_expand_builtin_string_cmp): New routine. Inline expansion + a call to str(n)cmp/memcmp. + * doc/invoke.texi (--param builtin-string-cmp-inline-length): New option. + * params.def (BUILTIN_STRING_CMP_INLINE_LENGTH): New. + 2018-07-13 Richard Earnshaw <rearnsha@arm.com> * config/arm/driver-arm.c: Include arm-native.h. diff --git a/gcc/builtins.c b/gcc/builtins.c index 820d6c2..839a818 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "memmodel.h" #include "gimple.h" #include "predict.h" +#include "params.h" #include "tm_p.h" #include "stringpool.h" #include "tree-vrp.h" @@ -118,6 +119,7 @@ static rtx expand_builtin_next_arg (void); static rtx expand_builtin_va_start (tree); static rtx expand_builtin_va_end (tree); static rtx expand_builtin_va_copy (tree); +static rtx inline_expand_builtin_string_cmp (tree, rtx, bool); static rtx expand_builtin_strcmp (tree, rtx); static rtx expand_builtin_strncmp (tree, rtx, machine_mode); static rtx builtin_memcpy_read_str (void *, HOST_WIDE_INT, scalar_int_mode); @@ -4443,19 +4445,32 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq) tree arg1 = CALL_EXPR_ARG (exp, 0); tree arg2 = CALL_EXPR_ARG (exp, 1); tree len = CALL_EXPR_ARG (exp, 2); + enum built_in_function fcode = DECL_FUNCTION_CODE (get_callee_fndecl (exp)); + bool no_overflow = true; /* Diagnose calls where the specified length exceeds the size of either object. */ - if (warn_stringop_overflow) + tree size = compute_objsize (arg1, 0); + no_overflow = check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, + len, /*maxread=*/NULL_TREE, size, + /*objsize=*/NULL_TREE); + if (no_overflow) + { + size = compute_objsize (arg2, 0); + no_overflow = check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, + len, /*maxread=*/NULL_TREE, size, + /*objsize=*/NULL_TREE); + } + + /* Due to the performance benefit, always inline the calls first + when result_eq is false. */ + rtx result = NULL_RTX; + + if (!result_eq && fcode != BUILT_IN_BCMP && no_overflow) { - tree size = compute_objsize (arg1, 0); - if (check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len, - /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE)) - { - size = compute_objsize (arg2, 0); - check_access (exp, /*dst=*/NULL_TREE, /*src=*/NULL_TREE, len, - /*maxread=*/NULL_TREE, size, /*objsize=*/NULL_TREE); - } + result = inline_expand_builtin_string_cmp (exp, target, true); + if (result) + return result; } machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); @@ -4497,10 +4512,10 @@ expand_builtin_memcmp (tree exp, rtx target, bool result_eq) && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1) constfn = builtin_memcpy_read_str; - rtx result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx, - TREE_TYPE (len), target, - result_eq, constfn, - CONST_CAST (char *, src_str)); + result = emit_block_cmp_hints (arg1_rtx, arg2_rtx, len_rtx, + TREE_TYPE (len), target, + result_eq, constfn, + CONST_CAST (char *, src_str)); if (result) { @@ -4530,6 +4545,12 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target) if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE)) return NULL_RTX; + /* Due to the performance benefit, always inline the calls first. */ + rtx result = NULL_RTX; + result = inline_expand_builtin_string_cmp (exp, target, false); + if (result) + return result; + insn_code cmpstr_icode = direct_optab_handler (cmpstr_optab, SImode); insn_code cmpstrn_icode = direct_optab_handler (cmpstrn_optab, SImode); if (cmpstr_icode == CODE_FOR_nothing && cmpstrn_icode == CODE_FOR_nothing) @@ -4552,7 +4573,6 @@ expand_builtin_strcmp (tree exp, ATTRIBUTE_UNUSED rtx target) rtx arg1_rtx = get_memory_rtx (arg1, NULL); rtx arg2_rtx = get_memory_rtx (arg2, NULL); - rtx result = NULL_RTX; /* Try to call cmpstrsi. */ if (cmpstr_icode != CODE_FOR_nothing) result = expand_cmpstr (cmpstr_icode, target, arg1_rtx, arg2_rtx, @@ -4644,6 +4664,12 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target, POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)) return NULL_RTX; + /* Due to the performance benefit, always inline the calls first. */ + rtx result = NULL_RTX; + result = inline_expand_builtin_string_cmp (exp, target, false); + if (result) + return result; + /* If c_strlen can determine an expression for one of the string lengths, and it doesn't have side effects, then emit cmpstrnsi using length MIN(strlen(string)+1, arg3). */ @@ -4706,9 +4732,9 @@ expand_builtin_strncmp (tree exp, ATTRIBUTE_UNUSED rtx target, rtx arg1_rtx = get_memory_rtx (arg1, len); rtx arg2_rtx = get_memory_rtx (arg2, len); rtx arg3_rtx = expand_normal (len); - rtx result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx, - arg2_rtx, TREE_TYPE (len), arg3_rtx, - MIN (arg1_align, arg2_align)); + result = expand_cmpstrn_or_cmpmem (cmpstrn_icode, target, arg1_rtx, + arg2_rtx, TREE_TYPE (len), arg3_rtx, + MIN (arg1_align, arg2_align)); tree fndecl = get_callee_fndecl (exp); if (result) @@ -6722,6 +6748,139 @@ expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore) return target; } +/* Expand a string compare operation using a sequence of char comparison + to get rid of the calling overhead, with result going to TARGET if + that's convenient. + + VAR_STR is the variable string source; + CONST_STR is the constant string source; + LENGTH is the number of chars to compare; + CONST_STR_N indicates which source string is the constant string; + IS_MEMCMP indicates whether it's a memcmp or strcmp. + + to: (assume const_str_n is 2, i.e., arg2 is a constant string) + + target = var_str[0] - const_str[0]; + if (target != 0) + goto ne_label; + ... + target = var_str[length - 2] - const_str[length - 2]; + if (target != 0) + goto ne_label; + target = var_str[length - 1] - const_str[length - 1]; + ne_label: + */ + +static rtx +inline_string_cmp (rtx target, tree var_str, const char* const_str, + unsigned HOST_WIDE_INT length, + int const_str_n, machine_mode mode, + bool is_memcmp) +{ + HOST_WIDE_INT offset = 0; + rtx var_rtx_array + = get_memory_rtx (var_str, build_int_cst (unsigned_type_node,length)); + rtx var_rtx = NULL_RTX; + rtx const_rtx = NULL_RTX; + rtx result = target ? target : gen_reg_rtx (mode); + rtx_code_label *ne_label = gen_label_rtx (); + tree unit_type_node = is_memcmp ? unsigned_char_type_node : char_type_node; + + start_sequence (); + + for (unsigned HOST_WIDE_INT i = 0; i < length; i++) + { + var_rtx + = adjust_address (var_rtx_array, TYPE_MODE (unit_type_node), offset); + const_rtx + = builtin_memcpy_read_str (CONST_CAST (char *, const_str), + offset, + as_a <scalar_int_mode> + TYPE_MODE (unit_type_node)); + rtx op0 = (const_str_n == 1) ? const_rtx : var_rtx; + rtx op1 = (const_str_n == 1) ? var_rtx : const_rtx; + + result = expand_simple_binop (mode, MINUS, op0, op1, + result, is_memcmp ? 1 : 0, OPTAB_WIDEN); + if (i < length - 1) + emit_cmp_and_jump_insns (result, CONST0_RTX (mode), NE, NULL_RTX, + mode, true, ne_label); + offset + += GET_MODE_SIZE (as_a <scalar_int_mode> TYPE_MODE (unit_type_node)); + } + + emit_label (ne_label); + rtx_insn *insns = get_insns (); + end_sequence (); + emit_insn (insns); + + return result; +} + +/* Inline expansion a call to str(n)cmp, with result going to + TARGET if that's convenient. + If the call is not been inlined, return NULL_RTX. */ +static rtx +inline_expand_builtin_string_cmp (tree exp, rtx target, bool is_memcmp) +{ + tree fndecl = get_callee_fndecl (exp); + enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl); + unsigned HOST_WIDE_INT length = 0; + bool is_ncmp = (fcode == BUILT_IN_STRNCMP || fcode == BUILT_IN_MEMCMP); + + gcc_checking_assert (fcode == BUILT_IN_STRCMP + || fcode == BUILT_IN_STRNCMP + || fcode == BUILT_IN_MEMCMP); + + tree arg1 = CALL_EXPR_ARG (exp, 0); + tree arg2 = CALL_EXPR_ARG (exp, 1); + tree len3_tree = is_ncmp ? CALL_EXPR_ARG (exp, 2) : NULL_TREE; + + unsigned HOST_WIDE_INT len1 = 0; + unsigned HOST_WIDE_INT len2 = 0; + unsigned HOST_WIDE_INT len3 = 0; + + const char *src_str1 = c_getstr (arg1, &len1); + const char *src_str2 = c_getstr (arg2, &len2); + + /* If neither strings is constant string, the call is not qualify. */ + if (!src_str1 && !src_str2) + return NULL_RTX; + + /* For strncmp, if the length is not a const, not qualify. */ + if (is_ncmp && !tree_fits_uhwi_p (len3_tree)) + return NULL_RTX; + + int const_str_n = 0; + if (!len1) + const_str_n = 2; + else if (!len2) + const_str_n = 1; + else if (len2 > len1) + const_str_n = 1; + else + const_str_n = 2; + + gcc_checking_assert (const_str_n > 0); + length = (const_str_n == 1) ? len1 : len2; + + if (is_ncmp && (len3 = tree_to_uhwi (len3_tree)) < length) + length = len3; + + /* If the length of the comparision is larger than the threshold, + do nothing. */ + if (length > (unsigned HOST_WIDE_INT) + PARAM_VALUE (BUILTIN_STRING_CMP_INLINE_LENGTH)) + return NULL_RTX; + + machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); + + /* Now, start inline expansion the call. */ + return inline_string_cmp (target, (const_str_n == 1) ? arg2 : arg1, + (const_str_n == 1) ? src_str1 : src_str2, length, + const_str_n, mode, is_memcmp); +} + /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient (and in mode MODE if that's convenient). diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e0e59f6..9804808 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10594,6 +10594,11 @@ Control the probability of the expression having the specified value. This parameter takes a percentage (i.e. 0 ... 100) as input. The default probability of 90 is obtained empirically. +@item builtin-string-cmp-inline-length +The maximum length of a constant string for a builtin string cmp call +eligible for inlining. +The default value is 3. + @item align-threshold Select fraction of the maximal frequency of executions of a basic block in diff --git a/gcc/params.def b/gcc/params.def index a3906c2..df3a06b 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -446,6 +446,10 @@ DEFPARAM(BUILTIN_EXPECT_PROBABILITY, "builtin-expect-probability", "Set the estimated probability in percentage for builtin expect. The default value is 90% probability.", 90, 0, 100) +DEFPARAM(BUILTIN_STRING_CMP_INLINE_LENGTH, + "builtin-string-cmp-inline-length", + "The maximum length of a constant string for a builtin string cmp call eligible for inlining. The default value is 3.", + 3, 0, 100) DEFPARAM(TRACER_DYNAMIC_COVERAGE_FEEDBACK, "tracer-dynamic-coverage-feedback", "The percentage of function, weighted by execution frequency, that must be covered by trace formation. Used when profile feedback is available.", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7b9627f..0c61e97 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-07-13 Qing Zhao <qing.zhao@oracle.com> + + PR middle-end/78809 + * gcc.dg/strcmpopt_5.c: New test. + * gcc.dg/strcmpopt_6.c: New test. + 2018-07-13 Richard Biener <rguenther@suse.de> PR middle-end/85974 diff --git a/gcc/testsuite/gcc.dg/strcmpopt_5.c b/gcc/testsuite/gcc.dg/strcmpopt_5.c new file mode 100644 index 0000000..c30fb78 --- /dev/null +++ b/gcc/testsuite/gcc.dg/strcmpopt_5.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O -fdump-rtl-expand" } */ + +typedef struct { char s[8]; int x; } S; +__attribute__ ((noinline)) int +f1 (S * s) +{ + int result = 0; + result += __builtin_strncmp (s->s, "ab", 2); + result += __builtin_strncmp (s->s, "abc", 3); + return result; +} + +__attribute__ ((noinline)) int +f2 (char *p) +{ + int result = 0; + result += __builtin_strncmp (p, "ab", 2); + result += __builtin_strncmp (p, "abc", 3); + return result; +} + +__attribute__ ((noinline)) int +f3 (S * s) +{ + int result = 0; + result += __builtin_strcmp (s->s, "a"); + result += __builtin_strcmp (s->s, "ab"); + return result; +} + +__attribute__ ((noinline)) int +f4 (char *p) +{ + int result = 0; + result += __builtin_strcmp (p, "a"); + result += __builtin_strcmp (p, "ab"); + return result; +} + +__attribute__ ((noinline)) int +f5 (S * s) +{ + int result = 0; + result += __builtin_memcmp (s->s, "ab", 2); + result += __builtin_memcmp (s->s, "abc", 3); + return result; +} + +__attribute__ ((noinline)) int +f6 (char *p) +{ + int result = 0; + result += __builtin_memcmp (p, "ab", 2); + result += __builtin_memcmp (p, "abc", 3); + return result; +} + +int main (void) +{ + S ss = {{'a','b','c'}, 2}; + char *s = "abcd"; + + if (f1 (&ss) != 0 || f2 (s) != 0) + __builtin_abort (); + + if (f3 (&ss) <= 0 || f4 (s) <= 0) + __builtin_abort (); + + if (f5 (&ss) != 0 || f6 (s) != 0) + __builtin_abort (); + + return 0; + +} + + +/* { dg-final { scan-rtl-dump-times "__builtin_strcmp" 0 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "__builtin_strncmp" 0 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "__builtin_memcmp" 0 "expand" } } */ diff --git a/gcc/testsuite/gcc.dg/strcmpopt_6.c b/gcc/testsuite/gcc.dg/strcmpopt_6.c new file mode 100644 index 0000000..0f8cf87 --- /dev/null +++ b/gcc/testsuite/gcc.dg/strcmpopt_6.c @@ -0,0 +1,36 @@ +/* When the specified length exceeds one of the arguments of the call to memcmp, + the call to memcmp should NOT be inlined. */ +/* { dg-do run } */ +/* { dg-options "-O -fdump-rtl-expand -Wno-stringop-overflow" } */ + +typedef struct { char s[8]; int x; } S; + +__attribute__ ((noinline)) int +f1 (S * s) +{ + int result = 0; + result += __builtin_memcmp (s->s, "a", 3); + return result; +} + +__attribute__ ((noinline)) int +f2 (char *p) +{ + int result = 0; + result += __builtin_memcmp (p, "a", 3); + return result; +} + +int main (void) +{ + S ss = {{'a','b','c'}, 2}; + char *s = "abcd"; + + if (f1 (&ss) < 0 || f2 (s) < 0) + __builtin_abort (); + + return 0; + +} + +/* { dg-final { scan-rtl-dump-times "__builtin_memcmp" 4 "expand" } } */ |