diff options
author | Tamar Christina <tamar.christina@arm.com> | 2021-11-24 06:38:18 +0000 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2021-11-24 06:38:18 +0000 |
commit | 0888d6bbe97e10de0e624f4ab46acc276e5ee1d7 (patch) | |
tree | 73d71b61ff24ac2ea345b2e637fd82daa88271ae /gcc/tree.c | |
parent | d71d019f63ed5d3fdb34579023bafa4dcf323f2c (diff) | |
download | gcc-0888d6bbe97e10de0e624f4ab46acc276e5ee1d7.zip gcc-0888d6bbe97e10de0e624f4ab46acc276e5ee1d7.tar.gz gcc-0888d6bbe97e10de0e624f4ab46acc276e5ee1d7.tar.bz2 |
middle-end: Convert bitclear <imm> + cmp<cc> #0 into cm<cc2> <imm2>
This optimizes the case where a mask Y which fulfills ~Y + 1 == pow2 is used to
clear a some bits and then compared against 0 into one without the masking and
a compare against a different bit immediate.
We can do this for all unsigned compares and for signed we can do it for
comparisons of EQ and NE:
(x & (~255)) == 0 becomes x <= 255. Which for leaves it to the target to
optimally deal with the comparison.
This transformation has to be done in the mid-end because in RTL you don't have
the signs of the comparison operands and if the target needs an immediate this
should be floated outside of the loop.
The RTL loop invariant hoisting is done before split1.
i.e.
void fun1(int32_t *x, int n)
{
for (int i = 0; i < (n & -16); i++)
x[i] = (x[i]&(~255)) == 0;
}
now generates:
.L3:
ldr q0, [x0]
cmhs v0.4s, v2.4s, v0.4s
and v0.16b, v1.16b, v0.16b
str q0, [x0], 16
cmp x0, x1
bne .L3
and floats the immediate out of the loop.
instead of:
.L3:
ldr q0, [x0]
bic v0.4s, #255
cmeq v0.4s, v0.4s, #0
and v0.16b, v1.16b, v0.16b
str q0, [x0], 16
cmp x0, x1
bne .L3
In order to not break IVopts and CSE I have added a
requirement for the scalar version to be single use.
gcc/ChangeLog:
* tree.c (bitmask_inv_cst_vector_p): New.
* tree.h (bitmask_inv_cst_vector_p): New.
* match.pd: Use it in new bitmask compare pattern.
gcc/testsuite/ChangeLog:
* gcc.dg/bic-bitmask-10.c: New test.
* gcc.dg/bic-bitmask-11.c: New test.
* gcc.dg/bic-bitmask-12.c: New test.
* gcc.dg/bic-bitmask-13.c: New test.
* gcc.dg/bic-bitmask-14.c: New test.
* gcc.dg/bic-bitmask-15.c: New test.
* gcc.dg/bic-bitmask-16.c: New test.
* gcc.dg/bic-bitmask-17.c: New test.
* gcc.dg/bic-bitmask-18.c: New test.
* gcc.dg/bic-bitmask-19.c: New test.
* gcc.dg/bic-bitmask-2.c: New test.
* gcc.dg/bic-bitmask-20.c: New test.
* gcc.dg/bic-bitmask-21.c: New test.
* gcc.dg/bic-bitmask-22.c: New test.
* gcc.dg/bic-bitmask-23.c: New test.
* gcc.dg/bic-bitmask-3.c: New test.
* gcc.dg/bic-bitmask-4.c: New test.
* gcc.dg/bic-bitmask-5.c: New test.
* gcc.dg/bic-bitmask-6.c: New test.
* gcc.dg/bic-bitmask-7.c: New test.
* gcc.dg/bic-bitmask-8.c: New test.
* gcc.dg/bic-bitmask-9.c: New test.
* gcc.dg/bic-bitmask.h: New test.
* gcc.target/aarch64/bic-bitmask-1.c: New test.
Diffstat (limited to 'gcc/tree.c')
-rw-r--r-- | gcc/tree.c | 53 |
1 files changed, 53 insertions, 0 deletions
@@ -10273,6 +10273,59 @@ uniform_integer_cst_p (tree t) return NULL_TREE; } +/* Checks to see if T is a constant or a constant vector and if each element E + adheres to ~E + 1 == pow2 then return ~E otherwise NULL_TREE. */ + +tree +bitmask_inv_cst_vector_p (tree t) +{ + + tree_code code = TREE_CODE (t); + tree type = TREE_TYPE (t); + + if (!INTEGRAL_TYPE_P (type) + && !VECTOR_INTEGER_TYPE_P (type)) + return NULL_TREE; + + unsigned HOST_WIDE_INT nelts = 1; + tree cst; + unsigned int idx = 0; + bool uniform = uniform_integer_cst_p (t); + tree newtype = unsigned_type_for (type); + tree_vector_builder builder; + if (code == INTEGER_CST) + cst = t; + else + { + if (!VECTOR_CST_NELTS (t).is_constant (&nelts)) + return NULL_TREE; + + cst = vector_cst_elt (t, 0); + builder.new_vector (newtype, nelts, 1); + } + + tree ty = unsigned_type_for (TREE_TYPE (cst)); + + do { + if (idx > 0) + cst = vector_cst_elt (t, idx); + wide_int icst = wi::to_wide (cst); + wide_int inv = wi::bit_not (icst); + icst = wi::add (1, inv); + if (wi::popcount (icst) != 1) + return NULL_TREE; + + tree newcst = wide_int_to_tree (ty, inv); + + if (uniform) + return build_uniform_cst (newtype, newcst); + + builder.quick_push (newcst); + } while (++idx < nelts); + + return builder.build (); +} + /* If VECTOR_CST T has a single nonzero element, return the index of that element, otherwise return -1. */ |