aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/vecreduce-add.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64/vecreduce-add.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-add.ll337
1 files changed, 123 insertions, 214 deletions
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 290a473..74d1165 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -1907,11 +1907,8 @@ define i32 @test_udot_v8i8(<8 x i8> %a, <8 x i8> %b) {
;
; CHECK-GI-BASE-LABEL: test_udot_v8i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-BASE-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-BASE-NEXT: umull v2.4s, v1.4h, v0.4h
-; CHECK-GI-BASE-NEXT: umlal2 v2.4s, v1.8h, v0.8h
-; CHECK-GI-BASE-NEXT: addv s0, v2.4s
+; CHECK-GI-BASE-NEXT: umull v0.8h, v1.8b, v0.8b
+; CHECK-GI-BASE-NEXT: uaddlv s0, v0.8h
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
;
@@ -1952,17 +1949,13 @@ define i32 @test_udot_v16i8(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-BASE-LABEL: test_udot_v16i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: ushll v2.8h, v0.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v0.8h, v0.16b, #0
-; CHECK-GI-BASE-NEXT: ushll v3.8h, v1.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v1.8h, v1.16b, #0
-; CHECK-GI-BASE-NEXT: umull v4.4s, v3.4h, v2.4h
-; CHECK-GI-BASE-NEXT: umull v5.4s, v1.4h, v0.4h
-; CHECK-GI-BASE-NEXT: umlal2 v4.4s, v3.8h, v2.8h
-; CHECK-GI-BASE-NEXT: umlal2 v5.4s, v1.8h, v0.8h
-; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v5.4s
-; CHECK-GI-BASE-NEXT: addv s0, v0.4s
-; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: umull v2.8h, v1.8b, v0.8b
+; CHECK-GI-BASE-NEXT: umull2 v0.8h, v1.16b, v0.16b
+; CHECK-GI-BASE-NEXT: uaddlv s1, v2.8h
+; CHECK-GI-BASE-NEXT: uaddlv s0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s1
+; CHECK-GI-BASE-NEXT: fmov w9, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
;
; CHECK-GI-DOT-LABEL: test_udot_v16i8:
@@ -2018,36 +2011,21 @@ define i32 @test_udot_v24i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-BASE-LABEL: test_udot_v24i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: fmov s0, wzr
-; CHECK-GI-BASE-NEXT: fmov s1, wzr
-; CHECK-GI-BASE-NEXT: ldr q2, [x0]
-; CHECK-GI-BASE-NEXT: ldr d3, [x0, #16]
-; CHECK-GI-BASE-NEXT: ldr q4, [x1]
-; CHECK-GI-BASE-NEXT: ldr d5, [x1, #16]
-; CHECK-GI-BASE-NEXT: ushll v6.8h, v2.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v2.8h, v2.16b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
-; CHECK-GI-BASE-NEXT: ushll v3.8h, v3.8b, #0
-; CHECK-GI-BASE-NEXT: ushll v7.8h, v4.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v4.8h, v4.16b, #0
-; CHECK-GI-BASE-NEXT: ushll v5.8h, v5.8b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
-; CHECK-GI-BASE-NEXT: umull v16.4s, v7.4h, v6.4h
-; CHECK-GI-BASE-NEXT: umull v17.4s, v4.4h, v2.4h
-; CHECK-GI-BASE-NEXT: umull v18.4s, v5.4h, v3.4h
-; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
-; CHECK-GI-BASE-NEXT: umlal2 v16.4s, v7.8h, v6.8h
-; CHECK-GI-BASE-NEXT: umlal2 v17.4s, v4.8h, v2.8h
-; CHECK-GI-BASE-NEXT: umlal2 v18.4s, v5.8h, v3.8h
-; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-BASE-NEXT: add v1.4s, v16.4s, v17.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v18.4s, v0.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-BASE-NEXT: addv s0, v0.4s
-; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ldr q0, [x0]
+; CHECK-GI-BASE-NEXT: ldr q1, [x1]
+; CHECK-GI-BASE-NEXT: ldr d2, [x0, #16]
+; CHECK-GI-BASE-NEXT: ldr d3, [x1, #16]
+; CHECK-GI-BASE-NEXT: umull v4.8h, v1.8b, v0.8b
+; CHECK-GI-BASE-NEXT: umull2 v0.8h, v1.16b, v0.16b
+; CHECK-GI-BASE-NEXT: umull v1.8h, v3.8b, v2.8b
+; CHECK-GI-BASE-NEXT: uaddlv s2, v4.8h
+; CHECK-GI-BASE-NEXT: uaddlv s0, v0.8h
+; CHECK-GI-BASE-NEXT: uaddlv s1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s2
+; CHECK-GI-BASE-NEXT: fmov w9, s0
+; CHECK-GI-BASE-NEXT: add w8, w8, w9
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
;
; CHECK-GI-DOT-LABEL: test_udot_v24i8:
@@ -2118,61 +2096,33 @@ define i32 @test_udot_v48i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-BASE-LABEL: test_udot_v48i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: fmov s0, wzr
-; CHECK-GI-BASE-NEXT: fmov s2, wzr
-; CHECK-GI-BASE-NEXT: ldr q16, [x0, #32]
-; CHECK-GI-BASE-NEXT: fmov s1, wzr
-; CHECK-GI-BASE-NEXT: fmov s3, wzr
-; CHECK-GI-BASE-NEXT: ldr q19, [x1, #32]
-; CHECK-GI-BASE-NEXT: ldp q5, q7, [x1]
-; CHECK-GI-BASE-NEXT: ushll v23.8h, v16.8b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
-; CHECK-GI-BASE-NEXT: mov v2.s[1], wzr
-; CHECK-GI-BASE-NEXT: ushll v20.8h, v19.8b, #0
-; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
-; CHECK-GI-BASE-NEXT: mov v3.s[1], wzr
-; CHECK-GI-BASE-NEXT: ushll2 v19.8h, v19.16b, #0
-; CHECK-GI-BASE-NEXT: ldp q18, q17, [x0]
-; CHECK-GI-BASE-NEXT: ushll v4.8h, v5.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v5.8h, v5.16b, #0
-; CHECK-GI-BASE-NEXT: ushll v6.8h, v7.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v7.8h, v7.16b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v16.8h, v16.16b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
-; CHECK-GI-BASE-NEXT: mov v2.s[2], wzr
-; CHECK-GI-BASE-NEXT: ushll v21.8h, v18.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v18.8h, v18.16b, #0
-; CHECK-GI-BASE-NEXT: ushll v22.8h, v17.8b, #0
-; CHECK-GI-BASE-NEXT: ushll2 v17.8h, v17.16b, #0
-; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
-; CHECK-GI-BASE-NEXT: mov v3.s[2], wzr
-; CHECK-GI-BASE-NEXT: umull v28.4s, v20.4h, v23.4h
-; CHECK-GI-BASE-NEXT: umull v29.4s, v19.4h, v16.4h
-; CHECK-GI-BASE-NEXT: umull v24.4s, v4.4h, v21.4h
-; CHECK-GI-BASE-NEXT: umull v25.4s, v5.4h, v18.4h
-; CHECK-GI-BASE-NEXT: umull v26.4s, v6.4h, v22.4h
-; CHECK-GI-BASE-NEXT: umull v27.4s, v7.4h, v17.4h
-; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v2.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v3.s[3], wzr
-; CHECK-GI-BASE-NEXT: umlal2 v28.4s, v20.8h, v23.8h
-; CHECK-GI-BASE-NEXT: umlal2 v29.4s, v19.8h, v16.8h
-; CHECK-GI-BASE-NEXT: umlal2 v24.4s, v4.8h, v21.8h
-; CHECK-GI-BASE-NEXT: umlal2 v25.4s, v5.8h, v18.8h
-; CHECK-GI-BASE-NEXT: umlal2 v26.4s, v6.8h, v22.8h
-; CHECK-GI-BASE-NEXT: umlal2 v27.4s, v7.8h, v17.8h
-; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-GI-BASE-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-GI-BASE-NEXT: add v4.4s, v28.4s, v29.4s
-; CHECK-GI-BASE-NEXT: add v2.4s, v24.4s, v25.4s
-; CHECK-GI-BASE-NEXT: add v3.4s, v26.4s, v27.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-BASE-NEXT: add v1.4s, v2.4s, v3.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v0.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-BASE-NEXT: addv s0, v0.4s
-; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ldp q0, q1, [x0]
+; CHECK-GI-BASE-NEXT: ldr q3, [x0, #32]
+; CHECK-GI-BASE-NEXT: ldp q2, q4, [x1]
+; CHECK-GI-BASE-NEXT: ldr q5, [x1, #32]
+; CHECK-GI-BASE-NEXT: umull v7.8h, v5.8b, v3.8b
+; CHECK-GI-BASE-NEXT: umull2 v3.8h, v5.16b, v3.16b
+; CHECK-GI-BASE-NEXT: umull v6.8h, v2.8b, v0.8b
+; CHECK-GI-BASE-NEXT: umull2 v0.8h, v2.16b, v0.16b
+; CHECK-GI-BASE-NEXT: umull2 v2.8h, v4.16b, v1.16b
+; CHECK-GI-BASE-NEXT: umull v1.8h, v4.8b, v1.8b
+; CHECK-GI-BASE-NEXT: uaddlv s5, v7.8h
+; CHECK-GI-BASE-NEXT: uaddlv s3, v3.8h
+; CHECK-GI-BASE-NEXT: uaddlv s4, v6.8h
+; CHECK-GI-BASE-NEXT: uaddlv s0, v0.8h
+; CHECK-GI-BASE-NEXT: uaddlv s2, v2.8h
+; CHECK-GI-BASE-NEXT: uaddlv s1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w11, s5
+; CHECK-GI-BASE-NEXT: fmov w8, s4
+; CHECK-GI-BASE-NEXT: fmov w9, s0
+; CHECK-GI-BASE-NEXT: fmov w10, s2
+; CHECK-GI-BASE-NEXT: add w8, w8, w9
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w10, w10, w11
+; CHECK-GI-BASE-NEXT: fmov w11, s3
+; CHECK-GI-BASE-NEXT: add w8, w8, w9
+; CHECK-GI-BASE-NEXT: add w9, w10, w11
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
;
; CHECK-GI-DOT-LABEL: test_udot_v48i8:
@@ -2225,11 +2175,8 @@ define i32 @test_sdot_v8i8(<8 x i8> %a, <8 x i8> %b) {
;
; CHECK-GI-BASE-LABEL: test_sdot_v8i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-BASE-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-BASE-NEXT: smull v2.4s, v1.4h, v0.4h
-; CHECK-GI-BASE-NEXT: smlal2 v2.4s, v1.8h, v0.8h
-; CHECK-GI-BASE-NEXT: addv s0, v2.4s
+; CHECK-GI-BASE-NEXT: smull v0.8h, v1.8b, v0.8b
+; CHECK-GI-BASE-NEXT: saddlv s0, v0.8h
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
;
@@ -2270,17 +2217,13 @@ define i32 @test_sdot_v16i8(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-BASE-LABEL: test_sdot_v16i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: sshll v2.8h, v0.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v0.8h, v0.16b, #0
-; CHECK-GI-BASE-NEXT: sshll v3.8h, v1.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v1.8h, v1.16b, #0
-; CHECK-GI-BASE-NEXT: smull v4.4s, v3.4h, v2.4h
-; CHECK-GI-BASE-NEXT: smull v5.4s, v1.4h, v0.4h
-; CHECK-GI-BASE-NEXT: smlal2 v4.4s, v3.8h, v2.8h
-; CHECK-GI-BASE-NEXT: smlal2 v5.4s, v1.8h, v0.8h
-; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v5.4s
-; CHECK-GI-BASE-NEXT: addv s0, v0.4s
-; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: smull v2.8h, v1.8b, v0.8b
+; CHECK-GI-BASE-NEXT: smull2 v0.8h, v1.16b, v0.16b
+; CHECK-GI-BASE-NEXT: saddlv s1, v2.8h
+; CHECK-GI-BASE-NEXT: saddlv s0, v0.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s1
+; CHECK-GI-BASE-NEXT: fmov w9, s0
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
;
; CHECK-GI-DOT-LABEL: test_sdot_v16i8:
@@ -2336,36 +2279,21 @@ define i32 @test_sdot_v24i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-BASE-LABEL: test_sdot_v24i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: fmov s0, wzr
-; CHECK-GI-BASE-NEXT: fmov s1, wzr
-; CHECK-GI-BASE-NEXT: ldr q2, [x0]
-; CHECK-GI-BASE-NEXT: ldr d3, [x0, #16]
-; CHECK-GI-BASE-NEXT: ldr q4, [x1]
-; CHECK-GI-BASE-NEXT: ldr d5, [x1, #16]
-; CHECK-GI-BASE-NEXT: sshll v6.8h, v2.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
-; CHECK-GI-BASE-NEXT: sshll v3.8h, v3.8b, #0
-; CHECK-GI-BASE-NEXT: sshll v7.8h, v4.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v4.8h, v4.16b, #0
-; CHECK-GI-BASE-NEXT: sshll v5.8h, v5.8b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
-; CHECK-GI-BASE-NEXT: smull v16.4s, v7.4h, v6.4h
-; CHECK-GI-BASE-NEXT: smull v17.4s, v4.4h, v2.4h
-; CHECK-GI-BASE-NEXT: smull v18.4s, v5.4h, v3.4h
-; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
-; CHECK-GI-BASE-NEXT: smlal2 v16.4s, v7.8h, v6.8h
-; CHECK-GI-BASE-NEXT: smlal2 v17.4s, v4.8h, v2.8h
-; CHECK-GI-BASE-NEXT: smlal2 v18.4s, v5.8h, v3.8h
-; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-BASE-NEXT: add v1.4s, v16.4s, v17.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v18.4s, v0.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-BASE-NEXT: addv s0, v0.4s
-; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ldr q0, [x0]
+; CHECK-GI-BASE-NEXT: ldr q1, [x1]
+; CHECK-GI-BASE-NEXT: ldr d2, [x0, #16]
+; CHECK-GI-BASE-NEXT: ldr d3, [x1, #16]
+; CHECK-GI-BASE-NEXT: smull v4.8h, v1.8b, v0.8b
+; CHECK-GI-BASE-NEXT: smull2 v0.8h, v1.16b, v0.16b
+; CHECK-GI-BASE-NEXT: smull v1.8h, v3.8b, v2.8b
+; CHECK-GI-BASE-NEXT: saddlv s2, v4.8h
+; CHECK-GI-BASE-NEXT: saddlv s0, v0.8h
+; CHECK-GI-BASE-NEXT: saddlv s1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w8, s2
+; CHECK-GI-BASE-NEXT: fmov w9, s0
+; CHECK-GI-BASE-NEXT: add w8, w8, w9
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
;
; CHECK-GI-DOT-LABEL: test_sdot_v24i8:
@@ -2436,61 +2364,33 @@ define i32 @test_sdot_v48i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-BASE-LABEL: test_sdot_v48i8:
; CHECK-GI-BASE: // %bb.0: // %entry
-; CHECK-GI-BASE-NEXT: fmov s0, wzr
-; CHECK-GI-BASE-NEXT: fmov s2, wzr
-; CHECK-GI-BASE-NEXT: ldr q16, [x0, #32]
-; CHECK-GI-BASE-NEXT: fmov s1, wzr
-; CHECK-GI-BASE-NEXT: fmov s3, wzr
-; CHECK-GI-BASE-NEXT: ldr q19, [x1, #32]
-; CHECK-GI-BASE-NEXT: ldp q5, q7, [x1]
-; CHECK-GI-BASE-NEXT: sshll v23.8h, v16.8b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[1], wzr
-; CHECK-GI-BASE-NEXT: mov v2.s[1], wzr
-; CHECK-GI-BASE-NEXT: sshll v20.8h, v19.8b, #0
-; CHECK-GI-BASE-NEXT: mov v1.s[1], wzr
-; CHECK-GI-BASE-NEXT: mov v3.s[1], wzr
-; CHECK-GI-BASE-NEXT: sshll2 v19.8h, v19.16b, #0
-; CHECK-GI-BASE-NEXT: ldp q18, q17, [x0]
-; CHECK-GI-BASE-NEXT: sshll v4.8h, v5.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v5.8h, v5.16b, #0
-; CHECK-GI-BASE-NEXT: sshll v6.8h, v7.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v7.8h, v7.16b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v16.8h, v16.16b, #0
-; CHECK-GI-BASE-NEXT: mov v0.s[2], wzr
-; CHECK-GI-BASE-NEXT: mov v2.s[2], wzr
-; CHECK-GI-BASE-NEXT: sshll v21.8h, v18.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v18.8h, v18.16b, #0
-; CHECK-GI-BASE-NEXT: sshll v22.8h, v17.8b, #0
-; CHECK-GI-BASE-NEXT: sshll2 v17.8h, v17.16b, #0
-; CHECK-GI-BASE-NEXT: mov v1.s[2], wzr
-; CHECK-GI-BASE-NEXT: mov v3.s[2], wzr
-; CHECK-GI-BASE-NEXT: smull v28.4s, v20.4h, v23.4h
-; CHECK-GI-BASE-NEXT: smull v29.4s, v19.4h, v16.4h
-; CHECK-GI-BASE-NEXT: smull v24.4s, v4.4h, v21.4h
-; CHECK-GI-BASE-NEXT: smull v25.4s, v5.4h, v18.4h
-; CHECK-GI-BASE-NEXT: smull v26.4s, v6.4h, v22.4h
-; CHECK-GI-BASE-NEXT: smull v27.4s, v7.4h, v17.4h
-; CHECK-GI-BASE-NEXT: mov v0.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v2.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v1.s[3], wzr
-; CHECK-GI-BASE-NEXT: mov v3.s[3], wzr
-; CHECK-GI-BASE-NEXT: smlal2 v28.4s, v20.8h, v23.8h
-; CHECK-GI-BASE-NEXT: smlal2 v29.4s, v19.8h, v16.8h
-; CHECK-GI-BASE-NEXT: smlal2 v24.4s, v4.8h, v21.8h
-; CHECK-GI-BASE-NEXT: smlal2 v25.4s, v5.8h, v18.8h
-; CHECK-GI-BASE-NEXT: smlal2 v26.4s, v6.8h, v22.8h
-; CHECK-GI-BASE-NEXT: smlal2 v27.4s, v7.8h, v17.8h
-; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-GI-BASE-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-GI-BASE-NEXT: add v4.4s, v28.4s, v29.4s
-; CHECK-GI-BASE-NEXT: add v2.4s, v24.4s, v25.4s
-; CHECK-GI-BASE-NEXT: add v3.4s, v26.4s, v27.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-BASE-NEXT: add v1.4s, v2.4s, v3.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v0.4s
-; CHECK-GI-BASE-NEXT: add v0.4s, v1.4s, v0.4s
-; CHECK-GI-BASE-NEXT: addv s0, v0.4s
-; CHECK-GI-BASE-NEXT: fmov w0, s0
+; CHECK-GI-BASE-NEXT: ldp q0, q1, [x0]
+; CHECK-GI-BASE-NEXT: ldr q3, [x0, #32]
+; CHECK-GI-BASE-NEXT: ldp q2, q4, [x1]
+; CHECK-GI-BASE-NEXT: ldr q5, [x1, #32]
+; CHECK-GI-BASE-NEXT: smull v7.8h, v5.8b, v3.8b
+; CHECK-GI-BASE-NEXT: smull2 v3.8h, v5.16b, v3.16b
+; CHECK-GI-BASE-NEXT: smull v6.8h, v2.8b, v0.8b
+; CHECK-GI-BASE-NEXT: smull2 v0.8h, v2.16b, v0.16b
+; CHECK-GI-BASE-NEXT: smull2 v2.8h, v4.16b, v1.16b
+; CHECK-GI-BASE-NEXT: smull v1.8h, v4.8b, v1.8b
+; CHECK-GI-BASE-NEXT: saddlv s5, v7.8h
+; CHECK-GI-BASE-NEXT: saddlv s3, v3.8h
+; CHECK-GI-BASE-NEXT: saddlv s4, v6.8h
+; CHECK-GI-BASE-NEXT: saddlv s0, v0.8h
+; CHECK-GI-BASE-NEXT: saddlv s2, v2.8h
+; CHECK-GI-BASE-NEXT: saddlv s1, v1.8h
+; CHECK-GI-BASE-NEXT: fmov w11, s5
+; CHECK-GI-BASE-NEXT: fmov w8, s4
+; CHECK-GI-BASE-NEXT: fmov w9, s0
+; CHECK-GI-BASE-NEXT: fmov w10, s2
+; CHECK-GI-BASE-NEXT: add w8, w8, w9
+; CHECK-GI-BASE-NEXT: fmov w9, s1
+; CHECK-GI-BASE-NEXT: add w10, w10, w11
+; CHECK-GI-BASE-NEXT: fmov w11, s3
+; CHECK-GI-BASE-NEXT: add w8, w8, w9
+; CHECK-GI-BASE-NEXT: add w9, w10, w11
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
; CHECK-GI-BASE-NEXT: ret
;
; CHECK-GI-DOT-LABEL: test_sdot_v48i8:
@@ -2549,18 +2449,27 @@ define i32 @test_udot_v8i8_multi_use(<8 x i8> %a, <8 x i8> %b) {
; CHECK-SD-DOT-NEXT: add w0, w8, w9
; CHECK-SD-DOT-NEXT: ret
;
-; CHECK-GI-LABEL: test_udot_v8i8_multi_use:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: umull v2.4s, v1.4h, v0.4h
-; CHECK-GI-NEXT: mov v3.16b, v2.16b
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: umlal2 v3.4s, v1.8h, v0.8h
-; CHECK-GI-NEXT: addv s0, v3.4s
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: add w0, w9, w8
-; CHECK-GI-NEXT: ret
+; CHECK-GI-BASE-LABEL: test_udot_v8i8_multi_use:
+; CHECK-GI-BASE: // %bb.0: // %entry
+; CHECK-GI-BASE-NEXT: umull v0.8h, v1.8b, v0.8b
+; CHECK-GI-BASE-NEXT: uaddlv s1, v0.8h
+; CHECK-GI-BASE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-BASE-NEXT: fmov w9, s0
+; CHECK-GI-BASE-NEXT: fmov w8, s1
+; CHECK-GI-BASE-NEXT: add w0, w8, w9
+; CHECK-GI-BASE-NEXT: ret
+;
+; CHECK-GI-DOT-LABEL: test_udot_v8i8_multi_use:
+; CHECK-GI-DOT: // %bb.0: // %entry
+; CHECK-GI-DOT-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-DOT-NEXT: umull v3.8h, v1.8b, v0.8b
+; CHECK-GI-DOT-NEXT: udot v2.2s, v1.8b, v0.8b
+; CHECK-GI-DOT-NEXT: ushll v0.4s, v3.4h, #0
+; CHECK-GI-DOT-NEXT: fmov w9, s0
+; CHECK-GI-DOT-NEXT: addp v1.2s, v2.2s, v2.2s
+; CHECK-GI-DOT-NEXT: fmov w8, s1
+; CHECK-GI-DOT-NEXT: add w0, w8, w9
+; CHECK-GI-DOT-NEXT: ret
entry:
%0 = zext <8 x i8> %a to <8 x i32>
%1 = zext <8 x i8> %b to <8 x i32>