diff options
author | dzhao.ampere <di.zhao@amperecomputing.com> | 2024-05-10 11:55:18 +0800 |
---|---|---|
committer | Di Zhao <dizhao@os.amperecomputing.com> | 2024-05-11 17:59:12 +0800 |
commit | 1b0919cd147a2b6ccdee2b1217bf0200bdcc87aa (patch) | |
tree | aa44eb31cfc9ffd4a843079a00586d36db086cbb | |
parent | 18c93c65a9fbaaf3762198e78fb3c24b9b6fd9fc (diff) | |
download | gcc-1b0919cd147a2b6ccdee2b1217bf0200bdcc87aa.zip gcc-1b0919cd147a2b6ccdee2b1217bf0200bdcc87aa.tar.gz gcc-1b0919cd147a2b6ccdee2b1217bf0200bdcc87aa.tar.bz2 |
tree-optimization/114760 - check variants of >> and << in loop-niter
When recognizing bit counting idiom, include pattern "x * 2"
for "x << 1", and "x / 2" for "x >> 1" (given x is unsigned).
gcc/ChangeLog:
PR tree-optimization/114760
* tree-ssa-loop-niter.cc (is_lshift_by_1): New function
to check if STMT is equivalent to x << 1.
(is_rshift_by_1): New function to check if STMT is
equivalent to x >> 1.
(number_of_iterations_cltz): Enhance the identification
of logical shift by one.
(number_of_iterations_cltz_complement): Enhance the
identification of logical shift by one.
gcc/testsuite/ChangeLog:
PR tree-optimization/114760
* gcc.dg/tree-ssa/pr114760-1.c: New test.
* gcc.dg/tree-ssa/pr114760-2.c: New test.
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c | 69 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c | 20 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-niter.cc | 56 |
3 files changed, 131 insertions, 14 deletions
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c new file mode 100644 index 0000000..9f10ccc --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-1.c @@ -0,0 +1,69 @@ +/* PR tree-optimization/114760 */ +/* { dg-do compile } */ +/* { dg-require-effective-target clz } */ +/* { dg-require-effective-target ctz } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +unsigned +ntz32_1 (unsigned x) +{ + int n = 32; + while (x != 0) + { + n = n - 1; + x = x * 2; + } + return n; +} + +unsigned +ntz32_2 (unsigned x) +{ + int n = 32; + while (x != 0) + { + n = n - 1; + x = x + x; + } + return n; +} + +unsigned +ntz32_3 (unsigned x) +{ + int n = 32; + while (x != 0) + { + n = n - 1; + x = x << 1; + } + return n; +} + +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) +int +nlz32_1 (unsigned int b) { + int c = PREC; + + while (b != 0) { + b >>= 1; + c --; + } + + return c; +} + +int +nlz32_2 (unsigned int b) { + int c = PREC; + + while (b != 0) { + b /= 2; + c --; + } + + return c; +} + +/* { dg-final { scan-tree-dump-times "__builtin_ctz|\\.CTZ" 3 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 2 "optimized" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c new file mode 100644 index 0000000..e1b4c4b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114760-2.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/114760 */ +/* { dg-do compile } */ +/* { dg-require-effective-target clz } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +// Check that for signed type, there's no CLZ. +#define PREC (__CHAR_BIT__ * __SIZEOF_INT__) +int +no_nlz32 (int b) { + int c = PREC; + + while (b != 0) { + b /= 2; + c --; + } + + return c; +} + +/* { dg-final { scan-tree-dump-not "__builtin_ctz|\\.CLZ" "optimized" } } */
\ No newline at end of file diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index 0fde07e..92db9c7 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -2303,6 +2303,38 @@ build_cltz_expr (tree src, bool leading, bool define_at_zero) return call; } +/* Returns true if STMT is equivalent to x << 1. */ + +static bool +is_lshift_by_1 (gassign *stmt) +{ + if (gimple_assign_rhs_code (stmt) == LSHIFT_EXPR + && integer_onep (gimple_assign_rhs2 (stmt))) + return true; + if (gimple_assign_rhs_code (stmt) == MULT_EXPR + && tree_fits_shwi_p (gimple_assign_rhs2 (stmt)) + && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) + return true; + return false; +} + +/* Returns true if STMT is equivalent to x >> 1. */ + +static bool +is_rshift_by_1 (gassign *stmt) +{ + if (!TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (stmt)))) + return false; + if (gimple_assign_rhs_code (stmt) == RSHIFT_EXPR + && integer_onep (gimple_assign_rhs2 (stmt))) + return true; + if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR + && tree_fits_shwi_p (gimple_assign_rhs2 (stmt)) + && tree_to_shwi (gimple_assign_rhs2 (stmt)) == 2) + return true; + return false; +} + /* See comment below for number_of_iterations_bitcount. For c[lt]z, we have: @@ -2400,14 +2432,12 @@ number_of_iterations_cltz (loop_p loop, edge exit, /* Make sure iv_2_stmt is a logical shift by one stmt: iv_2 = iv_1 {<<|>>} 1 */ - if (!is_gimple_assign (iv_2_stmt) - || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR - && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR - || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) - || !integer_onep (gimple_assign_rhs2 (iv_2_stmt))) + if (!is_gimple_assign (iv_2_stmt)) + return false; + bool left_shift = false; + if (!((left_shift = is_lshift_by_1 (as_a <gassign *> (iv_2_stmt))) + || is_rshift_by_1 (as_a <gassign *> (iv_2_stmt)))) return false; - - bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR); tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); @@ -2516,14 +2546,12 @@ number_of_iterations_cltz_complement (loop_p loop, edge exit, /* Make sure iv_2_stmt is a logical shift by one stmt: iv_2 = iv_1 {>>|<<} 1 */ - if (!is_gimple_assign (iv_2_stmt) - || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR - && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR - || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt))))) - || !integer_onep (gimple_assign_rhs2 (iv_2_stmt))) + if (!is_gimple_assign (iv_2_stmt)) + return false; + bool left_shift = false; + if (!((left_shift = is_lshift_by_1 (as_a <gassign *> (iv_2_stmt))) + || is_rshift_by_1 (as_a <gassign *> (iv_2_stmt)))) return false; - - bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR); tree iv_1 = gimple_assign_rhs1 (iv_2_stmt); |