aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll178
1 files changed, 178 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll b/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll
new file mode 100644
index 0000000..cf52934
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; Test optimization of DUP with extended narrow loads
+; This should avoid GPR->SIMD transfers by loading directly into vector registers
+
+define <4 x i16> @test_dup_zextload_i8_v4i16(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i8_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0]
+; CHECK-NEXT: dup v0.4h, v0.h[0]
+; CHECK-NEXT: ret
+ %load = load i8, ptr %p, align 1
+ %ext = zext i8 %load to i16
+ %vec = insertelement <4 x i16> poison, i16 %ext, i32 0
+ %dup = shufflevector <4 x i16> %vec, <4 x i16> poison, <4 x i32> zeroinitializer
+ ret <4 x i16> %dup
+}
+
+define <8 x i16> @test_dup_zextload_i8_v8i16(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0]
+; CHECK-NEXT: dup v0.8h, v0.h[0]
+; CHECK-NEXT: ret
+ %load = load i8, ptr %p, align 1
+ %ext = zext i8 %load to i16
+ %vec = insertelement <8 x i16> poison, i16 %ext, i32 0
+ %dup = shufflevector <8 x i16> %vec, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %dup
+}
+
+define <2 x i32> @test_dup_zextload_i8_v2i32(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i8_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0]
+; CHECK-NEXT: dup v0.2s, v0.s[0]
+; CHECK-NEXT: ret
+ %load = load i8, ptr %p, align 1
+ %ext = zext i8 %load to i32
+ %vec = insertelement <2 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer
+ ret <2 x i32> %dup
+}
+
+define <4 x i32> @test_dup_zextload_i8_v4i32(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i8_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0]
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+ %load = load i8, ptr %p, align 1
+ %ext = zext i8 %load to i32
+ %vec = insertelement <4 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret <4 x i32> %dup
+}
+
+define <4 x i32> @test_dup_zextload_i8_v4i32_offset(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i8_v4i32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, #4]
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+ %addr = getelementptr inbounds i8, ptr %p, i64 4
+ %load = load i8, ptr %addr, align 1
+ %ext = zext i8 %load to i32
+ %vec = insertelement <4 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret <4 x i32> %dup
+}
+
+define <4 x i32> @test_dup_zextload_i8_v4i32_reg_offset(ptr %p, i64 %offset) {
+; CHECK-LABEL: test_dup_zextload_i8_v4i32_reg_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0, x1]
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+ %addr = getelementptr inbounds i8, ptr %p, i64 %offset
+ %load = load i8, ptr %addr, align 1
+ %ext = zext i8 %load to i32
+ %vec = insertelement <4 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret <4 x i32> %dup
+}
+
+define <2 x i64> @test_dup_zextload_i8_v2i64(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i8_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr b0, [x0]
+; CHECK-NEXT: dup v0.2d, v0.d[0]
+; CHECK-NEXT: ret
+ %load = load i8, ptr %p, align 1
+ %ext = zext i8 %load to i64
+ %vec = insertelement <2 x i64> poison, i64 %ext, i32 0
+ %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer
+ ret <2 x i64> %dup
+}
+
+define <2 x i32> @test_dup_zextload_i16_v2i32(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i16_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: dup v0.2s, v0.s[0]
+; CHECK-NEXT: ret
+ %load = load i16, ptr %p, align 1
+ %ext = zext i16 %load to i32
+ %vec = insertelement <2 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer
+ ret <2 x i32> %dup
+}
+
+define <4 x i32> @test_dup_zextload_i16_v4i32(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i16_v4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+ %load = load i16, ptr %p, align 1
+ %ext = zext i16 %load to i32
+ %vec = insertelement <4 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret <4 x i32> %dup
+}
+
+define <4 x i32> @test_dup_zextload_i16_v4i32_offset(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i16_v4i32_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, #8]
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+ %addr = getelementptr inbounds i16, ptr %p, i64 4
+ %load = load i16, ptr %addr, align 1
+ %ext = zext i16 %load to i32
+ %vec = insertelement <4 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret <4 x i32> %dup
+}
+
+define <4 x i32> @test_dup_zextload_i16_v4i32_reg_offset(ptr %p, i64 %offset) {
+; CHECK-LABEL: test_dup_zextload_i16_v4i32_reg_offset:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: ret
+ %addr = getelementptr inbounds i16, ptr %p, i64 %offset
+ %load = load i16, ptr %addr, align 1
+ %ext = zext i16 %load to i32
+ %vec = insertelement <4 x i32> poison, i32 %ext, i32 0
+ %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer
+ ret <4 x i32> %dup
+}
+
+define <2 x i64> @test_dup_zextload_i16_v2i64(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i16_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: dup v0.2d, v0.d[0]
+; CHECK-NEXT: ret
+ %load = load i16, ptr %p, align 1
+ %ext = zext i16 %load to i64
+ %vec = insertelement <2 x i64> poison, i64 %ext, i32 0
+ %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer
+ ret <2 x i64> %dup
+}
+
+define <2 x i64> @test_dup_zextload_i32_v2i64(ptr %p) {
+; CHECK-LABEL: test_dup_zextload_i32_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: dup v0.2d, v0.d[0]
+; CHECK-NEXT: ret
+ %load = load i32, ptr %p, align 1
+ %ext = zext i32 %load to i64
+ %vec = insertelement <2 x i64> poison, i64 %ext, i32 0
+ %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer
+ ret <2 x i64> %dup
+}