diff options
Diffstat (limited to 'llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll b/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll new file mode 100644 index 0000000..cf52934 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; Test optimization of DUP with extended narrow loads +; This should avoid GPR->SIMD transfers by loading directly into vector registers + +define <4 x i16> @test_dup_zextload_i8_v4i16(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i16 + %vec = insertelement <4 x i16> poison, i16 %ext, i32 0 + %dup = shufflevector <4 x i16> %vec, <4 x i16> poison, <4 x i32> zeroinitializer + ret <4 x i16> %dup +} + +define <8 x i16> @test_dup_zextload_i8_v8i16(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i16 + %vec = insertelement <8 x i16> poison, i16 %ext, i32 0 + %dup = shufflevector <8 x i16> %vec, <8 x i16> poison, <8 x i32> zeroinitializer + ret <8 x i16> %dup +} + +define <2 x i32> @test_dup_zextload_i8_v2i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <2 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer + ret <2 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i8_v4i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i8_v4i32_offset(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v4i32_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0, #4] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i8, ptr %p, i64 4 + %load = load i8, ptr %addr, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i8_v4i32_reg_offset(ptr %p, i64 %offset) { +; CHECK-LABEL: test_dup_zextload_i8_v4i32_reg_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0, x1] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i8, ptr %p, i64 %offset + %load = load i8, ptr %addr, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <2 x i64> @test_dup_zextload_i8_v2i64(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i64 + %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 + %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer + ret <2 x i64> %dup +} + +define <2 x i32> @test_dup_zextload_i16_v2i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %load = load i16, ptr %p, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <2 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer + ret <2 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i16_v4i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %load = load i16, ptr %p, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i16_v4i32_offset(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v4i32_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0, #8] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i16, ptr %p, i64 4 + %load = load i16, ptr %addr, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i16_v4i32_reg_offset(ptr %p, i64 %offset) { +; CHECK-LABEL: test_dup_zextload_i16_v4i32_reg_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0, x1, lsl #1] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i16, ptr %p, i64 %offset + %load = load i16, ptr %addr, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <2 x i64> @test_dup_zextload_i16_v2i64(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %load = load i16, ptr %p, align 1 + %ext = zext i16 %load to i64 + %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 + %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer + ret <2 x i64> %dup +} + +define <2 x i64> @test_dup_zextload_i32_v2i64(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %load = load i32, ptr %p, align 1 + %ext = zext i32 %load to i64 + %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 + %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer + ret <2 x i64> %dup +} |
