diff options
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll | 80 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/const-stov.ll | 164 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/load-and-splat.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll | 35 |
7 files changed, 231 insertions, 98 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cb0c8ba..7199fac 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11595,6 +11595,15 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); SDValue Op0 = Op.getOperand(0); + EVT ValVT = Op0.getValueType(); + unsigned EltSize = Op.getValueType().getScalarSizeInBits(); + if (isa<ConstantSDNode>(Op0) && EltSize <= 32) { + int64_t IntVal = Op.getConstantOperandVal(0); + if (IntVal >= -16 && IntVal <= 15) + return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG, + dl); + } + ReuseLoadInfo RLI; if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() && Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD && @@ -11619,7 +11628,6 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Val = Op0; - EVT ValVT = Val.getValueType(); // P10 hardware store forwarding requires that a single store contains all // the data for the load. P10 is able to merge a pair of adjacent stores. Try // to avoid load hit store on P10 when running binaries compiled for older diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll index fba6725..2259b6e 100644 --- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll @@ -26,18 +26,14 @@ define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) { ; ; PWR7-LE-LABEL: build_v2i64_extload_0: ; PWR7-LE: # %bb.0: # %entry -; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: stw 4, -16(1) -; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha ; PWR7-LE-NEXT: lfiwzx 0, 0, 3 -; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l -; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxspltw 35, 0, 1 +; PWR7-LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; PWR7-LE-NEXT: xxlxor 36, 36, 36 +; PWR7-LE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; PWR7-LE-NEXT: xxspltw 34, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 34, 1 -; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 4, 3, 2 +; PWR7-LE-NEXT: xxswapd 35, 0 +; PWR7-LE-NEXT: vperm 2, 4, 2, 3 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v2i64_extload_0: @@ -357,18 +353,14 @@ define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) { ; ; PWR7-LE-LABEL: build_v4i32_load_0: ; PWR7-LE: # %bb.0: # %entry -; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: stw 4, -16(1) -; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha ; PWR7-LE-NEXT: lfiwzx 0, 0, 3 -; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l -; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxspltw 35, 0, 1 +; PWR7-LE-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; PWR7-LE-NEXT: xxlxor 36, 36, 36 +; PWR7-LE-NEXT: addi 3, 3, .LCPI8_0@toc@l +; PWR7-LE-NEXT: xxspltw 34, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 34, 1 -; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 4, 3, 2 +; PWR7-LE-NEXT: xxswapd 35, 0 +; PWR7-LE-NEXT: vperm 2, 4, 2, 3 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_0: @@ -412,18 +404,14 @@ define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) { ; ; PWR7-LE-LABEL: build_v4i32_load_1: ; PWR7-LE: # %bb.0: # %entry -; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: stw 4, -16(1) -; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha ; PWR7-LE-NEXT: lfiwzx 0, 0, 3 -; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l -; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxspltw 35, 0, 1 +; PWR7-LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; PWR7-LE-NEXT: xxlxor 36, 36, 36 +; PWR7-LE-NEXT: addi 3, 3, .LCPI9_0@toc@l +; PWR7-LE-NEXT: xxspltw 34, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 34, 1 -; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 3, 4, 2 +; PWR7-LE-NEXT: xxswapd 35, 0 +; PWR7-LE-NEXT: vperm 2, 2, 4, 3 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_1: @@ -469,18 +457,14 @@ define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) { ; ; PWR7-LE-LABEL: build_v4i32_load_2: ; PWR7-LE: # %bb.0: # %entry -; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: stw 4, -16(1) -; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha ; PWR7-LE-NEXT: lfiwzx 0, 0, 3 -; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l -; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxspltw 35, 0, 1 +; PWR7-LE-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; PWR7-LE-NEXT: xxlxor 36, 36, 36 +; PWR7-LE-NEXT: addi 3, 3, .LCPI10_0@toc@l +; PWR7-LE-NEXT: xxspltw 34, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 34, 1 -; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 3, 4, 2 +; PWR7-LE-NEXT: xxswapd 35, 0 +; PWR7-LE-NEXT: vperm 2, 2, 4, 3 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_2: @@ -524,18 +508,14 @@ define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) { ; ; PWR7-LE-LABEL: build_v4i32_load_3: ; PWR7-LE: # %bb.0: # %entry -; PWR7-LE-NEXT: li 4, 0 -; PWR7-LE-NEXT: stw 4, -16(1) -; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha ; PWR7-LE-NEXT: lfiwzx 0, 0, 3 -; PWR7-LE-NEXT: addi 3, 1, -16 -; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l -; PWR7-LE-NEXT: lxvd2x 1, 0, 4 -; PWR7-LE-NEXT: xxspltw 35, 0, 1 +; PWR7-LE-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; PWR7-LE-NEXT: xxlxor 36, 36, 36 +; PWR7-LE-NEXT: addi 3, 3, .LCPI11_0@toc@l +; PWR7-LE-NEXT: xxspltw 34, 0, 1 ; PWR7-LE-NEXT: lxvd2x 0, 0, 3 -; PWR7-LE-NEXT: xxswapd 34, 1 -; PWR7-LE-NEXT: xxswapd 36, 0 -; PWR7-LE-NEXT: vperm 2, 3, 4, 2 +; PWR7-LE-NEXT: xxswapd 35, 0 +; PWR7-LE-NEXT: vperm 2, 2, 4, 3 ; PWR7-LE-NEXT: blr ; ; PWR8-LE-LABEL: build_v4i32_load_3: diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index e1159e5..7f6fdc7 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -849,16 +849,12 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(ptr nocapture rea ; ; P8-AIX-32-LABEL: no_RAUW_in_combine_during_legalize: ; P8-AIX-32: # %bb.0: # %entry -; P8-AIX-32-NEXT: li r5, 0 ; P8-AIX-32-NEXT: slwi r4, r4, 2 -; P8-AIX-32-NEXT: xxlxor v3, v3, v3 -; P8-AIX-32-NEXT: stw r5, -16(r1) +; P8-AIX-32-NEXT: xxlxor v2, v2, v2 ; P8-AIX-32-NEXT: lfiwzx f0, r3, r4 -; P8-AIX-32-NEXT: addi r3, r1, -16 -; P8-AIX-32-NEXT: lxvw4x vs1, 0, r3 ; P8-AIX-32-NEXT: xxspltw vs0, vs0, 1 -; P8-AIX-32-NEXT: xxmrghw v2, vs1, vs0 -; P8-AIX-32-NEXT: vmrghb v2, v2, v3 +; P8-AIX-32-NEXT: xxmrghw v3, v2, vs0 +; P8-AIX-32-NEXT: vmrghb v2, v3, v2 ; P8-AIX-32-NEXT: blr entry: %idx.ext = sext i32 %offset to i64 diff --git a/llvm/test/CodeGen/PowerPC/const-stov.ll b/llvm/test/CodeGen/PowerPC/const-stov.ll new file mode 100644 index 0000000..69c68a4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/const-stov.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \ +; RUN: -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck \ +; RUN: --check-prefix=PWR7-BE %s +; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \ +; RUN: -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck \ +; RUN: --check-prefix=PWR8-BE %s +; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs \ +; RUN: -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck \ +; RUN: --check-prefix=PWR8-LE %s + +define <16 x i8> @i8(ptr nocapture noundef readonly %p) { +; PWR7-BE-LABEL: i8: +; PWR7-BE: # %bb.0: # %entry +; PWR7-BE-NEXT: lxvw4x v3, 0, r3 +; PWR7-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; PWR7-BE-NEXT: vspltisb v2, 10 +; PWR7-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; PWR7-BE-NEXT: lxvw4x v4, 0, r3 +; PWR7-BE-NEXT: vperm v2, v3, v2, v4 +; PWR7-BE-NEXT: blr +; +; PWR8-BE-LABEL: i8: +; PWR8-BE: # %bb.0: # %entry +; PWR8-BE-NEXT: lxvw4x v2, 0, r3 +; PWR8-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; PWR8-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; PWR8-BE-NEXT: lxvw4x v3, 0, r3 +; PWR8-BE-NEXT: li r3, 10 +; PWR8-BE-NEXT: mtvsrwz v4, r3 +; PWR8-BE-NEXT: vperm v2, v2, v4, v3 +; PWR8-BE-NEXT: blr +; +; PWR8-LE-LABEL: i8: +; PWR8-LE: # %bb.0: # %entry +; PWR8-LE-NEXT: lxvd2x vs0, 0, r3 +; PWR8-LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; PWR8-LE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; PWR8-LE-NEXT: xxswapd v2, vs0 +; PWR8-LE-NEXT: lxvd2x vs0, 0, r3 +; PWR8-LE-NEXT: li r3, 10 +; PWR8-LE-NEXT: mtvsrd v4, r3 +; PWR8-LE-NEXT: xxswapd v3, vs0 +; PWR8-LE-NEXT: vperm v2, v4, v2, v3 +; PWR8-LE-NEXT: blr +entry: + %0 = load <16 x i8>, ptr %p, align 16 + %vecinit1 = insertelement <16 x i8> %0, i8 10, i64 1 + ret <16 x i8> %vecinit1 +} + +define <8 x i16> @i16(ptr nocapture noundef readonly %p) { +; PWR7-BE-LABEL: i16: +; PWR7-BE: # %bb.0: # %entry +; PWR7-BE-NEXT: lxvw4x v3, 0, r3 +; PWR7-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; PWR7-BE-NEXT: vspltish v2, 9 +; PWR7-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; PWR7-BE-NEXT: lxvw4x v4, 0, r3 +; PWR7-BE-NEXT: vperm v2, v3, v2, v4 +; PWR7-BE-NEXT: blr +; +; PWR8-BE-LABEL: i16: +; PWR8-BE: # %bb.0: # %entry +; PWR8-BE-NEXT: lxvw4x v2, 0, r3 +; PWR8-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; PWR8-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; PWR8-BE-NEXT: lxvw4x v3, 0, r3 +; PWR8-BE-NEXT: li r3, 9 +; PWR8-BE-NEXT: mtvsrwz v4, r3 +; PWR8-BE-NEXT: vperm v2, v2, v4, v3 +; PWR8-BE-NEXT: blr +; +; PWR8-LE-LABEL: i16: +; PWR8-LE: # %bb.0: # %entry +; PWR8-LE-NEXT: lxvd2x vs0, 0, r3 +; PWR8-LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; PWR8-LE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; PWR8-LE-NEXT: xxswapd v2, vs0 +; PWR8-LE-NEXT: lxvd2x vs0, 0, r3 +; PWR8-LE-NEXT: li r3, 9 +; PWR8-LE-NEXT: mtvsrd v4, r3 +; PWR8-LE-NEXT: xxswapd v3, vs0 +; PWR8-LE-NEXT: vperm v2, v4, v2, v3 +; PWR8-LE-NEXT: blr +entry: + %0 = load <8 x i16>, ptr %p, align 16 + %vecinit1 = insertelement <8 x i16> %0, i16 9, i64 1 + ret <8 x i16> %vecinit1 +} + +define <4 x i32> @i32(ptr nocapture noundef readonly %p) { +; PWR7-BE-LABEL: i32: +; PWR7-BE: # %bb.0: # %entry +; PWR7-BE-NEXT: lxvw4x v3, 0, r3 +; PWR7-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; PWR7-BE-NEXT: vspltisw v2, 7 +; PWR7-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; PWR7-BE-NEXT: lxvw4x v4, 0, r3 +; PWR7-BE-NEXT: vperm v2, v3, v2, v4 +; PWR7-BE-NEXT: blr +; +; PWR8-BE-LABEL: i32: +; PWR8-BE: # %bb.0: # %entry +; PWR8-BE-NEXT: lxvw4x v2, 0, r3 +; PWR8-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; PWR8-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; PWR8-BE-NEXT: lxvw4x v3, 0, r3 +; PWR8-BE-NEXT: li r3, 7 +; PWR8-BE-NEXT: mtvsrwz v4, r3 +; PWR8-BE-NEXT: vperm v2, v2, v4, v3 +; PWR8-BE-NEXT: blr +; +; PWR8-LE-LABEL: i32: +; PWR8-LE: # %bb.0: # %entry +; PWR8-LE-NEXT: lxvd2x vs0, 0, r3 +; PWR8-LE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; PWR8-LE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; PWR8-LE-NEXT: xxswapd v2, vs0 +; PWR8-LE-NEXT: lxvd2x vs0, 0, r3 +; PWR8-LE-NEXT: li r3, 7 +; PWR8-LE-NEXT: mtvsrwz v4, r3 +; PWR8-LE-NEXT: xxswapd v3, vs0 +; PWR8-LE-NEXT: vperm v2, v4, v2, v3 +; PWR8-LE-NEXT: blr +entry: + %0 = load <4 x i32>, ptr %p, align 16 + %vecinit1 = insertelement <4 x i32> %0, i32 7, i64 1 + ret <4 x i32> %vecinit1 +} + +define <2 x i64> @i64(ptr nocapture noundef readonly %p) { +; PWR7-BE-LABEL: i64: +; PWR7-BE: # %bb.0: # %entry +; PWR7-BE-NEXT: lxvd2x v2, 0, r3 +; PWR7-BE-NEXT: li r3, 10 +; PWR7-BE-NEXT: std r3, -16(r1) +; PWR7-BE-NEXT: std r3, -8(r1) +; PWR7-BE-NEXT: addi r3, r1, -16 +; PWR7-BE-NEXT: lxvd2x v3, 0, r3 +; PWR7-BE-NEXT: xxmrghd v2, v2, v3 +; PWR7-BE-NEXT: blr +; +; PWR8-BE-LABEL: i64: +; PWR8-BE: # %bb.0: # %entry +; PWR8-BE-NEXT: lxvd2x v2, 0, r3 +; PWR8-BE-NEXT: li r3, 10 +; PWR8-BE-NEXT: mtfprd f0, r3 +; PWR8-BE-NEXT: xxmrghd v2, v2, vs0 +; PWR8-BE-NEXT: blr +; +; PWR8-LE-LABEL: i64: +; PWR8-LE: # %bb.0: # %entry +; PWR8-LE-NEXT: lxvd2x vs0, 0, r3 +; PWR8-LE-NEXT: li r3, 10 +; PWR8-LE-NEXT: xxswapd v2, vs0 +; PWR8-LE-NEXT: mtfprd f0, r3 +; PWR8-LE-NEXT: xxpermdi v2, vs0, v2, 1 +; PWR8-LE-NEXT: blr +entry: + %0 = load <2 x i64>, ptr %p, align 16 + %vecinit1 = insertelement <2 x i64> %0, i64 10, i64 1 + ret <2 x i64> %vecinit1 +} diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll index c9ee3a5..1993b16 100644 --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -356,11 +356,9 @@ define void @test6(ptr %a, ptr %in) { ; ; P9-AIX32-LABEL: test6: ; P9-AIX32: # %bb.0: # %entry -; P9-AIX32-NEXT: li r5, 0 -; P9-AIX32-NEXT: stw r5, -16(r1) ; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0 ; P9-AIX32-NEXT: lxvwsx vs1, 0, r4 -; P9-AIX32-NEXT: lxv vs2, -16(r1) +; P9-AIX32-NEXT: xxlxor vs2, vs2, vs2 ; P9-AIX32-NEXT: lxv vs0, 0(r5) ; P9-AIX32-NEXT: xxperm vs1, vs2, vs0 ; P9-AIX32-NEXT: stxv vs1, 0(r3) @@ -368,13 +366,10 @@ define void @test6(ptr %a, ptr %in) { ; ; P8-AIX32-LABEL: test6: ; P8-AIX32: # %bb.0: # %entry -; P8-AIX32-NEXT: li r5, 0 -; P8-AIX32-NEXT: stw r5, -16(r1) ; P8-AIX32-NEXT: lfiwzx f0, 0, r4 ; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0 +; P8-AIX32-NEXT: xxlxor v4, v4, v4 ; P8-AIX32-NEXT: lxvw4x v3, 0, r4 -; P8-AIX32-NEXT: addi r4, r1, -16 -; P8-AIX32-NEXT: lxvw4x v4, 0, r4 ; P8-AIX32-NEXT: xxspltw v2, vs0, 1 ; P8-AIX32-NEXT: vperm v2, v4, v2, v3 ; P8-AIX32-NEXT: stxvw4x v2, 0, r3 @@ -382,13 +377,10 @@ define void @test6(ptr %a, ptr %in) { ; ; P7-AIX32-LABEL: test6: ; P7-AIX32: # %bb.0: # %entry -; P7-AIX32-NEXT: li r5, 0 -; P7-AIX32-NEXT: stw r5, -16(r1) ; P7-AIX32-NEXT: lfiwzx f0, 0, r4 ; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0 +; P7-AIX32-NEXT: xxlxor v4, v4, v4 ; P7-AIX32-NEXT: lxvw4x v3, 0, r4 -; P7-AIX32-NEXT: addi r4, r1, -16 -; P7-AIX32-NEXT: lxvw4x v4, 0, r4 ; P7-AIX32-NEXT: xxspltw v2, vs0, 1 ; P7-AIX32-NEXT: vperm v2, v4, v2, v3 ; P7-AIX32-NEXT: stxvw4x v2, 0, r3 diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll index ad6a576..04e7110 100644 --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll @@ -60,15 +60,13 @@ define hidden void @function1() { ; CHECK-LINUX-32: # %bb.0: # %entry ; CHECK-LINUX-32-NEXT: mflr r0 ; CHECK-LINUX-32-NEXT: stw r0, 4(r1) -; CHECK-LINUX-32-NEXT: stwu r1, -48(r1) -; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 48 +; CHECK-LINUX-32-NEXT: stwu r1, -32(r1) +; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 32 ; CHECK-LINUX-32-NEXT: .cfi_offset lr, 4 ; CHECK-LINUX-32-NEXT: bl call1 -; CHECK-LINUX-32-NEXT: li r4, 0 ; CHECK-LINUX-32-NEXT: stw r3, 16(r1) -; CHECK-LINUX-32-NEXT: stw r4, 32(r1) -; CHECK-LINUX-32-NEXT: lwz r0, 52(r1) -; CHECK-LINUX-32-NEXT: addi r1, r1, 48 +; CHECK-LINUX-32-NEXT: lwz r0, 36(r1) +; CHECK-LINUX-32-NEXT: addi r1, r1, 32 ; CHECK-LINUX-32-NEXT: mtlr r0 ; CHECK-LINUX-32-NEXT: blr ; @@ -76,13 +74,11 @@ define hidden void @function1() { ; CHECK-AIX-32: # %bb.0: # %entry ; CHECK-AIX-32-NEXT: mflr r0 ; CHECK-AIX-32-NEXT: stw r0, 8(r1) -; CHECK-AIX-32-NEXT: stwu r1, -96(r1) +; CHECK-AIX-32-NEXT: stwu r1, -80(r1) ; CHECK-AIX-32-NEXT: bl .call1[PR] ; CHECK-AIX-32-NEXT: nop -; CHECK-AIX-32-NEXT: li r4, 0 ; CHECK-AIX-32-NEXT: stw r3, 64(r1) -; CHECK-AIX-32-NEXT: stw r4, 80(r1) -; CHECK-AIX-32-NEXT: addi r1, r1, 96 +; CHECK-AIX-32-NEXT: addi r1, r1, 80 ; CHECK-AIX-32-NEXT: lwz r0, 8(r1) ; CHECK-AIX-32-NEXT: mtlr r0 ; CHECK-AIX-32-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll index 0171e27..35b4780 100644 --- a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll @@ -8,30 +8,27 @@ define dso_local fastcc void @BuildVectorICE() unnamed_addr { ; 32BIT-LABEL: BuildVectorICE: ; 32BIT: # %bb.0: # %entry -; 32BIT-NEXT: stwu 1, -64(1) -; 32BIT-NEXT: .cfi_def_cfa_offset 64 -; 32BIT-NEXT: li 4, .LCPI0_0@l -; 32BIT-NEXT: lis 5, .LCPI0_0@ha +; 32BIT-NEXT: stwu 1, -48(1) +; 32BIT-NEXT: .cfi_def_cfa_offset 48 ; 32BIT-NEXT: lxvw4x 34, 0, 3 -; 32BIT-NEXT: li 3, 0 -; 32BIT-NEXT: addi 6, 1, 48 -; 32BIT-NEXT: li 7, 0 -; 32BIT-NEXT: lxvw4x 35, 5, 4 +; 32BIT-NEXT: li 3, .LCPI0_0@l +; 32BIT-NEXT: lis 4, .LCPI0_0@ha +; 32BIT-NEXT: li 5, 0 +; 32BIT-NEXT: xxlxor 36, 36, 36 +; 32BIT-NEXT: lxvw4x 35, 4, 3 +; 32BIT-NEXT: addi 3, 1, 16 ; 32BIT-NEXT: addi 4, 1, 32 -; 32BIT-NEXT: addi 5, 1, 16 ; 32BIT-NEXT: .p2align 4 ; 32BIT-NEXT: .LBB0_1: # %while.body ; 32BIT-NEXT: # -; 32BIT-NEXT: stw 3, 32(1) -; 32BIT-NEXT: stw 7, 16(1) -; 32BIT-NEXT: lxvw4x 36, 0, 4 -; 32BIT-NEXT: lxvw4x 37, 0, 5 -; 32BIT-NEXT: vperm 4, 5, 4, 3 -; 32BIT-NEXT: vadduwm 4, 2, 4 -; 32BIT-NEXT: xxspltw 37, 36, 1 -; 32BIT-NEXT: vadduwm 4, 4, 5 -; 32BIT-NEXT: stxvw4x 36, 0, 6 -; 32BIT-NEXT: lwz 7, 48(1) +; 32BIT-NEXT: stw 5, 16(1) +; 32BIT-NEXT: lxvw4x 37, 0, 3 +; 32BIT-NEXT: vperm 5, 5, 4, 3 +; 32BIT-NEXT: vadduwm 5, 2, 5 +; 32BIT-NEXT: xxspltw 32, 37, 1 +; 32BIT-NEXT: vadduwm 5, 5, 0 +; 32BIT-NEXT: stxvw4x 37, 0, 4 +; 32BIT-NEXT: lwz 5, 32(1) ; 32BIT-NEXT: b .LBB0_1 ; ; 64BIT-LABEL: BuildVectorICE: |