aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2021-10-20 17:09:00 +0100
committerTamar Christina <tamar.christina@arm.com>2021-10-20 17:09:00 +0100
commitea464fd2d4fc093fe723466e3d14524a967cefdc (patch)
tree01b34245eb9926e79cad7d2a8496fdfac3ff7223 /gcc
parent41812e5e35e231c500468aa1ca779f7c703dc1a3 (diff)
downloadgcc-ea464fd2d4fc093fe723466e3d14524a967cefdc.zip
gcc-ea464fd2d4fc093fe723466e3d14524a967cefdc.tar.gz
gcc-ea464fd2d4fc093fe723466e3d14524a967cefdc.tar.bz2
AArch64: Add pattern for sshr to cmlt
This optimizes signed right shift by BITSIZE-1 into a cmlt operation which is more optimal because generally compares have a higher throughput than shifts. On AArch64 the result of the shift would have been either -1 or 0 which is the results of the compare. i.e. void e (int * restrict a, int *b, int n) { for (int i = 0; i < n; i++) b[i] = a[i] >> 31; } now generates: .L4: ldr q0, [x0, x3] cmlt v0.4s, v0.4s, #0 str q0, [x1, x3] add x3, x3, 16 cmp x4, x3 bne .L4 instead of: .L4: ldr q0, [x0, x3] sshr v0.4s, v0.4s, 31 str q0, [x1, x3] add x3, x3, 16 cmp x4, x3 bne .L4 Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_simd_ashr<mode>): Add case cmp case. * config/aarch64/constraints.md (D1): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/shl-combine-2.c: New test. * gcc.target/aarch64/shl-combine-3.c: New test. * gcc.target/aarch64/shl-combine-4.c: New test. * gcc.target/aarch64/shl-combine-5.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md12
-rw-r--r--gcc/config/aarch64/constraints.md8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/shl-combine-2.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/shl-combine-3.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/shl-combine-4.c14
-rw-r--r--gcc/testsuite/gcc.target/aarch64/shl-combine-5.c14
6 files changed, 71 insertions, 5 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 7f0888e..0b340b4 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1127,12 +1127,14 @@
)
(define_insn "aarch64_simd_ashr<mode>"
- [(set (match_operand:VDQ_I 0 "register_operand" "=w")
- (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
- (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+ (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
+ (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))]
"TARGET_SIMD"
- "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
- [(set_attr "type" "neon_shift_imm<q>")]
+ "@
+ cmlt\t%0.<Vtype>, %1.<Vtype>, #0
+ sshr\t%0.<Vtype>, %1.<Vtype>, %2"
+ [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
)
(define_insn "*aarch64_simd_sra<mode>"
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3b49b45..1863081 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -437,6 +437,14 @@
(match_test "aarch64_simd_shift_imm_p (op, GET_MODE (op),
true)")))
+(define_constraint "D1"
+ "@internal
+ A constraint that matches vector of immediates that is bits(mode)-1."
+ (and (match_code "const,const_vector")
+ (match_test "aarch64_const_vec_all_same_in_range_p (op,
+ GET_MODE_UNIT_BITSIZE (mode) - 1,
+ GET_MODE_UNIT_BITSIZE (mode) - 1)")))
+
(define_constraint "Dr"
"@internal
A constraint that matches vector of immediates for right shifts."
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
new file mode 100644
index 0000000..6a0331f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-2.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE char
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+ for (int i = 0; i < n; i++)
+ b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c
new file mode 100644
index 0000000..2086b24a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-3.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE short
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+ for (int i = 0; i < n; i++)
+ b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c
new file mode 100644
index 0000000..0831810
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-4.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE int
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+ for (int i = 0; i < n; i++)
+ b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c b/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c
new file mode 100644
index 0000000..6b2a6bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shl-combine-5.c
@@ -0,0 +1,14 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps --param=vect-epilogues-nomask=0" } */
+
+#define TYPE long
+
+void e (signed TYPE * restrict a, signed TYPE *b, int n)
+{
+ for (int i = 0; i < n; i++)
+ b[i] = a[i] >> (sizeof(TYPE)*8)-1;
+}
+
+/* { dg-final { scan-assembler-times {\tcmlt\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tsshr\t} } } */
+