aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilco Dijkstra <wilco.dijkstra@arm.com>2024-10-15 16:22:23 +0000
committerWilco Dijkstra <wilco.dijkstra@arm.com>2024-10-23 13:20:01 +0000
commit7c7c895c2f34d2a5c0cd2139c5e76c13c6c030c9 (patch)
tree61d52891bf10fa7a9f2d693fdc8a8aabd65b1d4f
parent2b666dc4d1c96e0ea3597fe7e502a70198a66c03 (diff)
downloadgcc-7c7c895c2f34d2a5c0cd2139c5e76c13c6c030c9.zip
gcc-7c7c895c2f34d2a5c0cd2139c5e76c13c6c030c9.tar.gz
gcc-7c7c895c2f34d2a5c0cd2139c5e76c13c6c030c9.tar.bz2
AArch64: Fix copysign patterns
The current copysign pattern has a mismatch in the predicates and constraints - operand[2] is a register_operand but also has an alternative X which allows any operand. Since it is a floating point operation, having an integer alternative makes no sense. Change the expander to always use vector immediates which results in better code and sharing of immediates between copysign and xorsign. gcc/ChangeLog: * config/aarch64/aarch64.md (copysign<GPF:mode>3): Widen immediate to vector. (copysign<GPF:mode>3_insn): Use VQ_INT_EQUIV in operand 3. * config/aarch64/iterators.md (VQ_INT_EQUIV): New iterator. (vq_int_equiv): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/copysign_3.c: New test. * gcc.target/aarch64/copysign_4.c: New test. * gcc.target/aarch64/fneg-abs_2.c: Fixup test. * gcc.target/aarch64/sve/fneg-abs_2.c: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64.md48
-rw-r--r--gcc/config/aarch64/iterators.md8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/copysign_3.c16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/copysign_4.c17
-rw-r--r--gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c2
6 files changed, 62 insertions, 31 deletions
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c54b29c..ec9c731 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7218,13 +7218,12 @@
}
)
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
;;
-;; LDR d2, #(1 << 63)
-;; BSL v2.8b, [y], [x]
+;; movi v31.4s, 0x80, lsl 24
+;; bit v0.16b, v1.16b, v31.16b
;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because
-;; we expect these operations to nearly always operate on
+;; Because we expect these operations to nearly always operate on
;; floating-point values, we do not want the operation to be
;; simplified into a bit-field insert operation that operates on the
;; integer side, since typically that would involve three inter-bank
@@ -7239,32 +7238,25 @@
(match_operand:GPF 2 "nonmemory_operand")]
"TARGET_SIMD"
{
- rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
- << (GET_MODE_BITSIZE (<MODE>mode) - 1));
- /* copysign (x, -1) should instead be expanded as orr with the sign
- bit. */
+ rtx sign = GEN_INT (HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (<MODE>mode) - 1));
+ rtx v_bitmask = gen_const_vec_duplicate (<VQ_INT_EQUIV>mode, sign);
+ v_bitmask = force_reg (<VQ_INT_EQUIV>mode, v_bitmask);
+
+ /* copysign (x, -1) should instead be expanded as orr with the signbit. */
rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+
if (GET_CODE (op2_elt) == CONST_DOUBLE
&& real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
{
- rtx v_bitmask
- = force_reg (V2<V_INT_EQUIV>mode,
- gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
- signbit_const));
-
- emit_insn (gen_iorv2<v_int_equiv>3 (
- lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
- lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
+ emit_insn (gen_ior<vq_int_equiv>3 (
+ lowpart_subreg (<VQ_INT_EQUIV>mode, operands[0], <MODE>mode),
+ lowpart_subreg (<VQ_INT_EQUIV>mode, operands[1], <MODE>mode),
v_bitmask));
DONE;
}
-
- machine_mode int_mode = <V_INT_EQUIV>mode;
- rtx bitmask = gen_reg_rtx (int_mode);
- emit_move_insn (bitmask, signbit_const);
operands[2] = force_reg (<MODE>mode, operands[2]);
emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
- bitmask));
+ v_bitmask));
DONE;
}
)
@@ -7273,23 +7265,21 @@
[(set (match_operand:GPF 0 "register_operand")
(unspec:GPF [(match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "register_operand")
- (match_operand:<V_INT_EQUIV> 3 "register_operand")]
+ (match_operand:<VQ_INT_EQUIV> 3 "register_operand")]
UNSPEC_COPYSIGN))]
"TARGET_SIMD"
{@ [ cons: =0 , 1 , 2 , 3 ; attrs: type ]
[ w , w , w , 0 ; neon_bsl<q> ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
[ w , 0 , w , w ; neon_bsl<q> ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
[ w , w , 0 , w ; neon_bsl<q> ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
- [ r , r , 0 , X ; bfm ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1>
}
)
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
;;
-;; LDR d2, #1<<63
-;; AND v3.8B, v1.8B, v2.8B
-;; EOR v0.8B, v0.8B, v3.8B
+;; movi v31.4s, 0x80, lsl 24
+;; and v31.16b, v31.16b, v1.16b
+;; eor v0.16b, v31.16b, v0.16b
;;
(define_expand "@xorsign<mode>3"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index efba783..e1383750 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1891,6 +1891,14 @@
(VNx8SF "vnx8si") (VNx16SF "vnx16si")
])
+;; Mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr VQ_INT_EQUIV [(DF "V2DI") (SF "V4SI")
+])
+
+;; Lower case mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr vq_int_equiv [(DF "v2di") (SF "v4si")
+])
+
;; Floating-point equivalent of selected modes.
(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
(VNx8BF "VNx8HF")
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_3.c b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
new file mode 100644
index 0000000..be48682
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+float f1 (float x, float y)
+{
+ return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+ return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 2 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign_4.c b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
new file mode 100644
index 0000000..f3cec2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign_4.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8-a+sve" } */
+
+float f1 (float x, float y)
+{
+ return __builtin_copysignf (1.0, x) * __builtin_copysignf (1.0, y);
+}
+
+double f2 (double x, double y)
+{
+ return __builtin_copysign (1.0, x) * __builtin_copysign (1.0, y);
+}
+
+/* { dg-final { scan-assembler-times "movi\t" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tz" 1 } } */
+/* { dg-final { scan-assembler-not "copysign\tw" } } */
+/* { dg-final { scan-assembler-not "dup\tw" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
index 18d10ee..9fe8e9b 100644
--- a/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/fneg-abs_2.c
@@ -9,7 +9,7 @@
/*
** f1:
-** orr v[0-9]+.2s, #?128, lsl #?24
+** orr v[0-9]+.4s, #?128, lsl #?24
** ret
*/
float32_t f1 (float32_t a)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
index fe08fe3..cc97c95 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_2.c
@@ -7,7 +7,7 @@
/*
** f1:
-** orr v0.2s, #?128, lsl #?24
+** orr v0.4s, #?128, lsl #?24
** ret
*/
float32_t f1 (float32_t a)