aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2020-12-16 20:43:47 +0000
committerTamar Christina <tamar.christina@arm.com>2020-12-16 20:43:54 +0000
commit84747acf8da36425f7e36cf99b251ee047b2e3a5 (patch)
tree9df7e178db23e4f9015bca549c456747f7e13563 /gcc
parentcdb1c276560b26a9c3bc837340669962ef1b430a (diff)
downloadgcc-84747acf8da36425f7e36cf99b251ee047b2e3a5.zip
gcc-84747acf8da36425f7e36cf99b251ee047b2e3a5.tar.gz
gcc-84747acf8da36425f7e36cf99b251ee047b2e3a5.tar.bz2
AArch64: Add NEON, SVE and SVE2 RTL patterns for Complex Addition.
This adds implementation for the optabs for add complex operations. With this the following C code: void f90 (float complex a[restrict N], float complex b[restrict N], float complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I); } generates f90: mov x3, 0 .p2align 3,,7 .L2: ldr q0, [x0, x3] ldr q1, [x1, x3] fcadd v0.4s, v0.4s, v1.4s, #90 str q0, [x2, x3] add x3, x3, 16 cmp x3, 1600 bne .L2 ret instead of f90: add x3, x1, 1600 .p2align 3,,7 .L2: ld2 {v4.4s - v5.4s}, [x0], 32 ld2 {v2.4s - v3.4s}, [x1], 32 fsub v0.4s, v4.4s, v3.4s fadd v1.4s, v5.4s, v2.4s st2 {v0.4s - v1.4s}, [x2], 32 cmp x3, x1 bne .L2 ret gcc/ChangeLog: * config/aarch64/aarch64-simd.md (cadd<rot><mode>3): New. * config/aarch64/iterators.md (SVE2_INT_CADD_OP): New. * config/aarch64/aarch64-sve.md (cadd<rot><mode>3): New. * config/aarch64/aarch64-sve2.md (cadd<rot><mode>3): New.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md8
-rw-r--r--gcc/config/aarch64/aarch64-sve.md14
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md10
-rw-r--r--gcc/config/aarch64/iterators.md4
4 files changed, 36 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 68baf41..05d18f8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -449,6 +449,14 @@
[(set_attr "type" "neon_fcadd")]
)
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:VHSDF 0 "register_operand")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+ (match_operand:VHSDF 2 "register_operand")]
+ FCADD))]
+ "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+)
+
(define_insn "aarch64_fcmla<rot><mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 6359c40..6a5194f 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5480,6 +5480,20 @@
"TARGET_SVE"
)
+;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
+(define_expand "@cadd<rot><mode>3"
+ [(set (match_operand:SVE_FULL_F 0 "register_operand")
+ (unspec:SVE_FULL_F
+ [(match_dup 3)
+ (const_int SVE_RELAXED_GP)
+ (match_operand:SVE_FULL_F 1 "register_operand")
+ (match_operand:SVE_FULL_F 2 "register_operand")]
+ SVE_COND_FCADD))]
+ "TARGET_SVE"
+{
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+})
+
;; Predicated FCADD, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
[(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 772c350..1897ddf 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1799,6 +1799,16 @@
[(set_attr "movprfx" "*,yes")]
)
+;; unpredicated optab pattern for auto-vectorizer
+(define_expand "cadd<rot><mode>3"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand")
+ (unspec:SVE_FULL_I
+ [(match_operand:SVE_FULL_I 1 "register_operand")
+ (match_operand:SVE_FULL_I 2 "register_operand")]
+ SVE2_INT_CADD_OP))]
+ "TARGET_SVE2"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT] Complex ternary operations
;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fb1426b..b8ee422 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2598,6 +2598,10 @@
UNSPEC_SQRDCMLAH180
UNSPEC_SQRDCMLAH270])
+;; Same as SVE2_INT_CADD but exclude the saturating instructions
+(define_int_iterator SVE2_INT_CADD_OP [UNSPEC_CADD90
+ UNSPEC_CADD270])
+
(define_int_iterator SVE2_INT_CDOT [UNSPEC_CDOT
UNSPEC_CDOT90
UNSPEC_CDOT180