diff options
author | Tamar Christina <tamar.christina@arm.com> | 2021-10-20 17:09:00 +0100 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2021-10-20 17:09:00 +0100 |
commit | ea464fd2d4fc093fe723466e3d14524a967cefdc (patch) | |
tree | 01b34245eb9926e79cad7d2a8496fdfac3ff7223 /gcc | |
parent | 41812e5e35e231c500468aa1ca779f7c703dc1a3 (diff) | |
download | gcc-ea464fd2d4fc093fe723466e3d14524a967cefdc.zip gcc-ea464fd2d4fc093fe723466e3d14524a967cefdc.tar.gz gcc-ea464fd2d4fc093fe723466e3d14524a967cefdc.tar.bz2 |
AArch64: Add pattern for sshr to cmlt
This optimizes signed right shift by BITSIZE-1 into a cmlt operation which is
more optimal because generally compares have a higher throughput than shifts.
On AArch64 the result of the shift would have been either -1 or 0 which is the
results of the compare.
i.e.
void e (int * restrict a, int *b, int n)
{
for (int i = 0; i < n; i++)
b[i] = a[i] >> 31;
}
now generates:
.L4:
ldr q0, [x0, x3]
cmlt v0.4s, v0.4s, #0
str q0, [x1, x3]
add x3, x3, 16
cmp x4, x3
bne .L4
instead of:
.L4:
ldr q0, [x0, x3]
sshr v0.4s, v0.4s, 31
str q0, [x1, x3]
add x3, x3, 16
cmp x4, x3
bne .L4
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (aarch64_simd_ashr<mode>): Add case cmp
case.
* config/aarch64/constraints.md (D1): New.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/shl-combine-2.c: New test.
* gcc.target/aarch64/shl-combine-3.c: New test.
* gcc.target/aarch64/shl-combine-4.c: New test.
* gcc.target/aarch64/shl-combine-5.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 12 | ||||
-rw-r--r-- | gcc/config/aarch64/constraints.md | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/shl-combine-2.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/shl-combine-3.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/shl-combine-4.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/shl-combine-5.c | 14 |
6 files changed, 71 insertions, 5 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7f0888e..0b340b4 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1127,12 +1127,14 @@ ) (define_insn "aarch64_simd_ashr<mode>" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") - (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] + [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") + (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w") + (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))] "TARGET_SIMD" - "sshr\t%0.<Vtype>, %1.<Vtype>, %2" - [(set_attr "type" "neon_shift_imm<q>")] + "@ + cmlt\t%0.<Vtype>, %1.<Vtype>, #0 + sshr\t%0.<Vtype>, %1.<Vtype>, %2" + [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")] ) (define_insn "*aarch64_simd_sra<mode>" diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 3b49b45..1863081 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -437,6 +437,14 @@ (match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op), true)"))) +(define_constraint "D1" + "@internal + A constraint that matches vector of immediates that is bits(mode)-1." + (and (match_code "const,const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, + GET_MODE_UNIT_BITSIZE (mode) - 1, + GET_MODE_UNIT_BITSIZE (mode) - 1)"))) + (define_constraint "Dr" "@internal A constraint that matches vector of immediates for right shifts." diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c new file mode 100644 index 0000000..6a0331f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */ + +#define TYPE char + +void e (signed TYPE * restrict a, signed TYPE *b, int n) +{ + for (int i = 0; i < n; i++) + b[i] = a[i] >> (sizeof(TYPE)*8)-1; +} + +/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */ +/* { dg-final { scan-assembler-not {\tsshr\t} } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c new file mode 100644 index 0000000..2086b24a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */ + +#define TYPE short + +void e (signed TYPE * restrict a, signed TYPE *b, int n) +{ + for (int i = 0; i < n; i++) + b[i] = a[i] >> (sizeof(TYPE)*8)-1; +} + +/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */ +/* { dg-final { scan-assembler-not {\tsshr\t} } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c new file mode 100644 index 0000000..0831810 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */ + +#define TYPE int + +void e (signed TYPE * restrict a, signed TYPE *b, int n) +{ + for (int i = 0; i < n; i++) + b[i] = a[i] >> (sizeof(TYPE)*8)-1; +} + +/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */ +/* { dg-final { scan-assembler-not {\tsshr\t} } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c new file mode 100644 index 0000000..6b2a6bd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */ + +#define TYPE long + +void e (signed TYPE * restrict a, signed TYPE *b, int n) +{ + for (int i = 0; i < n; i++) + b[i] = a[i] >> (sizeof(TYPE)*8)-1; +} + +/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */ +/* { dg-final { scan-assembler-not {\tsshr\t} } } */ + |