diff options
author | Tamar Christina <tamar.christina@arm.com> | 2023-11-09 14:05:40 +0000 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2023-11-09 14:18:52 +0000 |
commit | e01c2eeb2b654abc82378e204da8327bcdaf05dc (patch) | |
tree | 347f7607c8662cbc0354b51d07236a2fa4c464df | |
parent | ed2e058c58ab064fe3a26bc4a47a5d0a47350f97 (diff) | |
download | gcc-e01c2eeb2b654abc82378e204da8327bcdaf05dc.zip gcc-e01c2eeb2b654abc82378e204da8327bcdaf05dc.tar.gz gcc-e01c2eeb2b654abc82378e204da8327bcdaf05dc.tar.bz2 |
AArch64: Add SVE implementation for cond_copysign.
This adds an implementation for masked copysign along with an optimized
pattern for masked copysign (x, -1).
gcc/ChangeLog:
PR tree-optimization/109154
* config/aarch64/aarch64-sve.md (cond_copysign<mode>): New.
gcc/testsuite/ChangeLog:
PR tree-optimization/109154
* gcc.target/aarch64/sve/fneg-abs_5.c: New test.
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 51 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c | 36 |
2 files changed, 87 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index cb07c61..cfadac4 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -6429,6 +6429,57 @@ } ) +(define_expand "cond_copysign<mode>" + [(match_operand:SVE_FULL_F 0 "register_operand") + (match_operand:<VPRED> 1 "register_operand") + (match_operand:SVE_FULL_F 2 "register_operand") + (match_operand:SVE_FULL_F 3 "nonmemory_operand") + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + "TARGET_SVE" + { + rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); + rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); + rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); + int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; + + rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); + rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode); + rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode); + + rtx v_sign_bitmask + = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, + HOST_WIDE_INT_M1U << bits); + + /* copysign (x, -1) should instead be expanded as orr with the sign + bit. */ + if (!REG_P (operands[3])) + { + rtx op2_elt = unwrap_const_vec_duplicate (operands[3]); + if (GET_CODE (op2_elt) == CONST_DOUBLE + && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt))) + { + arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask); + emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2, + arg3, arg4)); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); + DONE; + } + } + + operands[2] = force_reg (<MODE>mode, operands[3]); + emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask)); + emit_insn (gen_and<v_int_equiv>3 + (mant, arg2, + aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, + ~(HOST_WIDE_INT_M1U + << bits)))); + emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant, + arg4)); + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); + DONE; + } +) + (define_expand "xorsign<mode>3" [(match_operand:SVE_FULL_F 0 "register_operand") (match_operand:SVE_FULL_F 1 "register_operand") diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c new file mode 100644 index 0000000..f4ecbee --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/fneg-abs_5.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#include <arm_neon.h> +#include <math.h> + +/* +** f1: +** ... +** orr z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s +** ... +*/ +void f1 (float32_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + if (a[i] > n) + a[i] = -fabsf (a[i]); + else + a[i] = n; +} + +/* +** f2: +** ... +** orr z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d +** ... +*/ +void f2 (float64_t *a, int n) +{ + for (int i = 0; i < (n & -8); i++) + if (a[i] > n) + a[i] = -fabs (a[i]); + else + a[i] = n; +} |