diff options
-rw-r--r-- | llvm/test/CodeGen/AArch64/zext-shuffle.ll | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/zext-shuffle.ll b/llvm/test/CodeGen/AArch64/zext-shuffle.ll new file mode 100644 index 0000000..4ef8daf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zext-shuffle.ll @@ -0,0 +1,561 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-none-eabi -o - %s | FileCheck %s + +define <2 x i64> @v2i64_02(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: v2i64_02: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 0, i32 2> + %d = zext <2 x i32> %c to <2 x i64> + ret <2 x i64> %d +} + +define <2 x i64> @v2i64_13(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: v2i64_13: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 1, i32 3> + %d = zext <2 x i32> %c to <2 x i64> + ret <2 x i64> %d +} + +define <2 x i64> @v2i64_04812(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: v2i64_04812: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 0, i32 4> + %d = zext <2 x i32> %c to <2 x i64> + ret <2 x i64> %d +} + +define <2 x i64> @v2i64_15913(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: v2i64_15913: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 1, i32 5> + %d = zext <2 x i32> %c to <2 x i64> + ret <2 x i64> %d +} + +define <2 x i64> @v2i64_261014(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: v2i64_261014: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 2, i32 6> + %d = zext <2 x i32> %c to <2 x i64> + ret <2 x i64> %d +} + +define <2 x i64> @v2i64_37(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: v2i64_37: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 3, i32 7> + %d = zext <2 x i32> %c to <2 x i64> + ret <2 x i64> %d +} + +define <4 x i64> @v2i64_i16_04812(<16 x i16> %a) { +; CHECK-LABEL: v2i64_i16_04812: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI6_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> + %z1 = zext <4 x i16> %s1 to <4 x i64> + ret <4 x i64> %z1 +} + +define <4 x i64> @v2i64_i16_15913(<16 x i16> %a) { +; CHECK-LABEL: v2i64_i16_15913: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> + %z1 = zext <4 x i16> %s1 to <4 x i64> + ret <4 x i64> %z1 +} + +define <4 x i64> @v2i64_i16_261014(<16 x i16> %a) { +; CHECK-LABEL: v2i64_i16_261014: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> + %z1 = zext <4 x i16> %s1 to <4 x i64> + ret <4 x i64> %z1 +} + +define <4 x i64> @v2i64_i16_371115(<16 x i16> %a) { +; CHECK-LABEL: v2i64_i16_371115: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret + %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> + %z1 = zext <4 x i16> %s1 to <4 x i64> + ret <4 x i64> %z1 +} + + +define <4 x i32> @v4i32_0246(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: v4i32_0246: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %d = zext <4 x i16> %c to <4 x i32> + ret <4 x i32> %d +} + +define <4 x i32> @v4i32_1357(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: v4i32_1357: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v0.8h +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %d = zext <4 x i16> %c to <4 x i32> + ret <4 x i32> %d +} + +define <4 x i32> @v4i32_04812(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: v4i32_04812: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 0, i32 4, i32 8, i32 12> + %d = zext <4 x i16> %c to <4 x i32> + ret <4 x i32> %d +} + +define <4 x i32> @v4i32_15913(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: v4i32_15913: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 1, i32 5, i32 9, i32 13> + %d = zext <4 x i16> %c to <4 x i32> + ret <4 x i32> %d +} + +define <4 x i32> @v4i32_261014(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: v4i32_261014: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 2, i32 6, i32 10, i32 14> + %d = zext <4 x i16> %c to <4 x i32> + ret <4 x i32> %d +} + +define <4 x i32> @v4i32_371115(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: v4i32_371115: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 3, i32 7, i32 11, i32 15> + %d = zext <4 x i16> %c to <4 x i32> + ret <4 x i32> %d +} + + +define <8 x i16> @v8i16_0246(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i16_0246: +; CHECK: // %bb.0: +; CHECK-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> + %d = zext <8 x i8> %c to <8 x i16> + ret <8 x i16> %d +} + +define <8 x i16> @v8i16_1357(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i16_1357: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v0.16b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> + %d = zext <8 x i8> %c to <8 x i16> + ret <8 x i16> %d +} + +define <8 x i16> @v8i16_04812(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i16_04812: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> + %d = zext <8 x i8> %c to <8 x i16> + ret <8 x i16> %d +} + +define <8 x i16> @v8i16_15913(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i16_15913: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> + %d = zext <8 x i8> %c to <8 x i16> + ret <8 x i16> %d +} + +define <8 x i16> @v8i16_261014(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i16_261014: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> + %d = zext <8 x i8> %c to <8 x i16> + ret <8 x i16> %d +} + +define <8 x i16> @v8i16_371115(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: v8i16_371115: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI21_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> + %d = zext <8 x i8> %c to <8 x i16> + ret <8 x i16> %d +} + + +define <8 x i64> @zext_add(<32 x i16> %l) { +; CHECK-LABEL: zext_add: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI22_0 +; CHECK-NEXT: adrp x9, .LCPI22_3 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: adrp x8, .LCPI22_1 +; CHECK-NEXT: ldr q7, [x9, :lo12:.LCPI22_3] +; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI22_1] +; CHECK-NEXT: adrp x8, .LCPI22_2 +; CHECK-NEXT: adrp x9, .LCPI22_7 +; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI22_2] +; CHECK-NEXT: adrp x8, .LCPI22_4 +; CHECK-NEXT: ldr q18, [x9, :lo12:.LCPI22_7] +; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI22_4] +; CHECK-NEXT: adrp x8, .LCPI22_5 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v5.16b, { v0.16b, v1.16b }, v5.16b +; CHECK-NEXT: ldr q17, [x8, :lo12:.LCPI22_5] +; CHECK-NEXT: adrp x8, .LCPI22_6 +; CHECK-NEXT: tbl v7.16b, { v0.16b, v1.16b }, v7.16b +; CHECK-NEXT: ldr q19, [x8, :lo12:.LCPI22_6] +; CHECK-NEXT: tbl v17.16b, { v0.16b, v1.16b }, v17.16b +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v18.16b +; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: tbl v4.16b, { v2.16b, v3.16b }, v6.16b +; CHECK-NEXT: tbl v6.16b, { v2.16b, v3.16b }, v16.16b +; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b }, v19.16b +; CHECK-NEXT: uaddl v5.4s, v5.4h, v7.4h +; CHECK-NEXT: uaddl v7.4s, v17.4h, v0.4h +; CHECK-NEXT: uaddl2 v4.4s, v1.8h, v4.8h +; CHECK-NEXT: uaddl2 v2.4s, v6.8h, v2.8h +; CHECK-NEXT: uaddl v0.2d, v5.2s, v7.2s +; CHECK-NEXT: uaddl2 v1.2d, v5.4s, v7.4s +; CHECK-NEXT: uaddl2 v3.2d, v4.4s, v2.4s +; CHECK-NEXT: uaddl v2.2d, v4.2s, v2.2s +; CHECK-NEXT: ret + %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> + %z1 = zext <8 x i16> %s1 to <8 x i64> + %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> + %z2 = zext <8 x i16> %s2 to <8 x i64> + %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> + %z3 = zext <8 x i16> %s3 to <8 x i64> + %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> + %z4 = zext <8 x i16> %s4 to <8 x i64> + %a = add <8 x i64> %z1, %z2 + %b = add <8 x i64> %z3, %z4 + %c = add <8 x i64> %a, %b + ret <8 x i64> %c +} + +define <8 x i64> @zext_load_add(ptr %p) { +; CHECK-LABEL: zext_load_add: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0] +; CHECK-NEXT: uaddl v4.4s, v0.4h, v1.4h +; CHECK-NEXT: uaddl v5.4s, v2.4h, v3.4h +; CHECK-NEXT: uaddl2 v6.4s, v0.8h, v1.8h +; CHECK-NEXT: uaddl2 v2.4s, v2.8h, v3.8h +; CHECK-NEXT: uaddl v0.2d, v4.2s, v5.2s +; CHECK-NEXT: uaddl2 v1.2d, v4.4s, v5.4s +; CHECK-NEXT: uaddl2 v3.2d, v6.4s, v2.4s +; CHECK-NEXT: uaddl v2.2d, v6.2s, v2.2s +; CHECK-NEXT: ret + %l = load <32 x i16>, ptr %p + %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> + %z1 = zext <8 x i16> %s1 to <8 x i64> + %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> + %z2 = zext <8 x i16> %s2 to <8 x i64> + %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> + %z3 = zext <8 x i16> %s3 to <8 x i64> + %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> + %z4 = zext <8 x i16> %s4 to <8 x i64> + %a = add <8 x i64> %z1, %z2 + %b = add <8 x i64> %z3, %z4 + %c = add <8 x i64> %a, %b + ret <8 x i64> %c +} + +define <8 x double> @uitofp_fadd(<32 x i16> %l) { +; CHECK-LABEL: uitofp_fadd: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI24_0 +; CHECK-NEXT: adrp x9, .LCPI24_1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: adrp x10, .LCPI24_6 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI24_0] +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI24_1] +; CHECK-NEXT: adrp x8, .LCPI24_2 +; CHECK-NEXT: adrp x9, .LCPI24_3 +; CHECK-NEXT: ldr q6, [x8, :lo12:.LCPI24_2] +; CHECK-NEXT: adrp x8, .LCPI24_4 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl v4.16b, { v0.16b, v1.16b }, v4.16b +; CHECK-NEXT: tbl v5.16b, { v2.16b, v3.16b }, v5.16b +; CHECK-NEXT: ldr q7, [x9, :lo12:.LCPI24_3] +; CHECK-NEXT: adrp x9, .LCPI24_5 +; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI24_4] +; CHECK-NEXT: adrp x8, .LCPI24_7 +; CHECK-NEXT: ldr q17, [x9, :lo12:.LCPI24_5] +; CHECK-NEXT: ldr q18, [x10, :lo12:.LCPI24_6] +; CHECK-NEXT: ldr q19, [x8, :lo12:.LCPI24_7] +; CHECK-NEXT: tbl v6.16b, { v0.16b, v1.16b }, v6.16b +; CHECK-NEXT: tbl v7.16b, { v2.16b, v3.16b }, v7.16b +; CHECK-NEXT: tbl v16.16b, { v0.16b, v1.16b }, v16.16b +; CHECK-NEXT: tbl v17.16b, { v2.16b, v3.16b }, v17.16b +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v18.16b +; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v19.16b +; CHECK-NEXT: ushll2 v5.4s, v5.8h, #0 +; CHECK-NEXT: ushll v4.4s, v4.4h, #0 +; CHECK-NEXT: ushll2 v7.4s, v7.8h, #0 +; CHECK-NEXT: ushll v6.4s, v6.4h, #0 +; CHECK-NEXT: ushll v16.4s, v16.4h, #0 +; CHECK-NEXT: ushll2 v20.2d, v5.4s, #0 +; CHECK-NEXT: ushll2 v21.2d, v4.4s, #0 +; CHECK-NEXT: ushll2 v17.4s, v17.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: ushll v2.2d, v5.2s, #0 +; CHECK-NEXT: ushll v3.2d, v4.2s, #0 +; CHECK-NEXT: ushll2 v4.2d, v7.4s, #0 +; CHECK-NEXT: ushll2 v5.2d, v6.4s, #0 +; CHECK-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-NEXT: ucvtf v18.2d, v20.2d +; CHECK-NEXT: ucvtf v19.2d, v21.2d +; CHECK-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-NEXT: ushll2 v20.2d, v17.4s, #0 +; CHECK-NEXT: ushll2 v21.2d, v16.4s, #0 +; CHECK-NEXT: ushll v17.2d, v17.2s, #0 +; CHECK-NEXT: ushll v16.2d, v16.2s, #0 +; CHECK-NEXT: ushll v22.2d, v0.2s, #0 +; CHECK-NEXT: ushll2 v23.2d, v1.4s, #0 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-NEXT: ucvtf v2.2d, v2.2d +; CHECK-NEXT: ucvtf v3.2d, v3.2d +; CHECK-NEXT: ucvtf v4.2d, v4.2d +; CHECK-NEXT: ucvtf v5.2d, v5.2d +; CHECK-NEXT: ucvtf v7.2d, v7.2d +; CHECK-NEXT: ucvtf v6.2d, v6.2d +; CHECK-NEXT: ucvtf v20.2d, v20.2d +; CHECK-NEXT: ucvtf v21.2d, v21.2d +; CHECK-NEXT: ucvtf v17.2d, v17.2d +; CHECK-NEXT: ucvtf v16.2d, v16.2d +; CHECK-NEXT: ucvtf v22.2d, v22.2d +; CHECK-NEXT: ucvtf v23.2d, v23.2d +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v1.2d, v1.2d +; CHECK-NEXT: fadd v4.2d, v18.2d, v4.2d +; CHECK-NEXT: fadd v3.2d, v3.2d, v6.2d +; CHECK-NEXT: fadd v2.2d, v2.2d, v7.2d +; CHECK-NEXT: fadd v5.2d, v19.2d, v5.2d +; CHECK-NEXT: fadd v6.2d, v16.2d, v22.2d +; CHECK-NEXT: fadd v16.2d, v20.2d, v23.2d +; CHECK-NEXT: fadd v7.2d, v17.2d, v1.2d +; CHECK-NEXT: fadd v1.2d, v21.2d, v0.2d +; CHECK-NEXT: fadd v0.2d, v3.2d, v6.2d +; CHECK-NEXT: fadd v3.2d, v4.2d, v16.2d +; CHECK-NEXT: fadd v1.2d, v5.2d, v1.2d +; CHECK-NEXT: fadd v2.2d, v2.2d, v7.2d +; CHECK-NEXT: ret + %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> + %z1 = uitofp <8 x i16> %s1 to <8 x double> + %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> + %z2 = uitofp <8 x i16> %s2 to <8 x double> + %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> + %z3 = uitofp <8 x i16> %s3 to <8 x double> + %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> + %z4 = uitofp <8 x i16> %s4 to <8 x double> + %a = fadd <8 x double> %z1, %z2 + %b = fadd <8 x double> %z3, %z4 + %c = fadd <8 x double> %a, %b + ret <8 x double> %c +} + +define <8 x double> @uitofp_load_fadd(ptr %p) { +; CHECK-LABEL: uitofp_load_fadd: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0] +; CHECK-NEXT: ushll2 v4.4s, v0.8h, #0 +; CHECK-NEXT: ushll v5.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v6.4s, v1.8h, #0 +; CHECK-NEXT: ushll v7.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v16.4s, v2.8h, #0 +; CHECK-NEXT: ushll v17.4s, v2.4h, #0 +; CHECK-NEXT: ushll2 v18.4s, v3.8h, #0 +; CHECK-NEXT: ushll v0.4s, v3.4h, #0 +; CHECK-NEXT: ushll2 v1.2d, v4.4s, #0 +; CHECK-NEXT: ushll2 v2.2d, v5.4s, #0 +; CHECK-NEXT: ushll v3.2d, v4.2s, #0 +; CHECK-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-NEXT: ushll2 v5.2d, v6.4s, #0 +; CHECK-NEXT: ushll2 v19.2d, v7.4s, #0 +; CHECK-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-NEXT: ushll2 v20.2d, v16.4s, #0 +; CHECK-NEXT: ushll2 v21.2d, v17.4s, #0 +; CHECK-NEXT: ushll v16.2d, v16.2s, #0 +; CHECK-NEXT: ushll v17.2d, v17.2s, #0 +; CHECK-NEXT: ushll v22.2d, v0.2s, #0 +; CHECK-NEXT: ushll2 v23.2d, v18.4s, #0 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: ushll v18.2d, v18.2s, #0 +; CHECK-NEXT: ucvtf v1.2d, v1.2d +; CHECK-NEXT: ucvtf v2.2d, v2.2d +; CHECK-NEXT: ucvtf v3.2d, v3.2d +; CHECK-NEXT: ucvtf v4.2d, v4.2d +; CHECK-NEXT: ucvtf v5.2d, v5.2d +; CHECK-NEXT: ucvtf v19.2d, v19.2d +; CHECK-NEXT: ucvtf v6.2d, v6.2d +; CHECK-NEXT: ucvtf v7.2d, v7.2d +; CHECK-NEXT: ucvtf v20.2d, v20.2d +; CHECK-NEXT: ucvtf v21.2d, v21.2d +; CHECK-NEXT: ucvtf v16.2d, v16.2d +; CHECK-NEXT: ucvtf v17.2d, v17.2d +; CHECK-NEXT: ucvtf v22.2d, v22.2d +; CHECK-NEXT: ucvtf v23.2d, v23.2d +; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v18.2d, v18.2d +; CHECK-NEXT: fadd v1.2d, v1.2d, v5.2d +; CHECK-NEXT: fadd v4.2d, v4.2d, v7.2d +; CHECK-NEXT: fadd v6.2d, v3.2d, v6.2d +; CHECK-NEXT: fadd v2.2d, v2.2d, v19.2d +; CHECK-NEXT: fadd v3.2d, v17.2d, v22.2d +; CHECK-NEXT: fadd v5.2d, v16.2d, v18.2d +; CHECK-NEXT: fadd v7.2d, v21.2d, v0.2d +; CHECK-NEXT: fadd v16.2d, v20.2d, v23.2d +; CHECK-NEXT: fadd v0.2d, v4.2d, v3.2d +; CHECK-NEXT: fadd v3.2d, v1.2d, v16.2d +; CHECK-NEXT: fadd v1.2d, v2.2d, v7.2d +; CHECK-NEXT: fadd v2.2d, v6.2d, v5.2d +; CHECK-NEXT: ret + %l = load <32 x i16>, ptr %p + %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> + %z1 = uitofp <8 x i16> %s1 to <8 x double> + %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> + %z2 = uitofp <8 x i16> %s2 to <8 x double> + %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> + %z3 = uitofp <8 x i16> %s3 to <8 x double> + %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> + %z4 = uitofp <8 x i16> %s4 to <8 x double> + %a = fadd <8 x double> %z1, %z2 + %b = fadd <8 x double> %z3, %z4 + %c = fadd <8 x double> %a, %b + ret <8 x double> %c +} + |