diff options
author | Diana Picus <Diana-Magda.Picus@amd.com> | 2025-09-26 10:03:13 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-26 10:03:13 +0200 |
commit | b8375c5824dcb2bf6f1a2b2cd6824128fa66932a (patch) | |
tree | 7e2a954d77bd7675a775761f8b27c0bfdeb1489e | |
parent | 82d978a5b7e400289323bf75987ac9bcf29ef668 (diff) | |
parent | 3257dc35fe9ed872788e90c948cb4bb593b8fa05 (diff) | |
download | llvm-users/rovka/remove-dvgpr-target-features.zip llvm-users/rovka/remove-dvgpr-target-features.tar.gz llvm-users/rovka/remove-dvgpr-target-features.tar.bz2 |
Merge branch 'main' into users/rovka/remove-dvgpr-target-featuresusers/rovka/remove-dvgpr-target-features
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMSubtarget.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 39 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/fnegs.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/fnmscs.ll | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll | 48 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll | 4 |
10 files changed, 91 insertions, 78 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 7ec9885..f291191 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -2730,7 +2730,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML, HasVMemStore = true; } for (const MachineOperand &Op : MI.all_uses()) { - if (!TRI->isVectorRegister(*MRI, Op.getReg())) + if (Op.isDebug() || !TRI->isVectorRegister(*MRI, Op.getReg())) continue; RegInterval Interval = Brackets.getRegInterval(&MI, MRI, TRI, Op); // Vgpr use diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9a247bb..78b7066 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5573,7 +5573,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, llvm_unreachable("Unknown VFP cmp argument!"); } -/// OptimizeVFPBrcond - With nnan, it's legal to optimize some +/// OptimizeVFPBrcond - With nnan and without daz, it's legal to optimize some /// f32 and even f64 comparisons to integer ones. SDValue ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { @@ -5729,9 +5729,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } SDNodeFlags Flags = Op->getFlags(); - if ((getTargetMachine().Options.UnsafeFPMath || Flags.hasNoNaNs()) && - (DAG.getDenormalMode(MVT::f32) == DenormalMode::getIEEE() && - DAG.getDenormalMode(MVT::f64) == DenormalMode::getIEEE()) && + if (Flags.hasNoNaNs() && + DAG.getDenormalMode(MVT::f32) == DenormalMode::getIEEE() && + DAG.getDenormalMode(MVT::f64) == DenormalMode::getIEEE() && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { if (SDValue Result = OptimizeVFPBrcond(Op, DAG)) diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index f059294..3329bea 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -222,8 +222,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. const FeatureBitset &Bits = getFeatureBits(); if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters - (Options.UnsafeFPMath || isTargetDarwin() || - DM == DenormalMode::getPreserveSign())) + (isTargetDarwin() || DM == DenormalMode::getPreserveSign())) HasNEONForFP = true; if (isRWPI()) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 7bf6493..5d4a8fd 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -1603,7 +1603,7 @@ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, /// value is necessary in order to fit the above form. static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, - SDValue V1, SDValue V2, SelectionDAG &DAG, + SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { int SplatIndex = -1; for (const auto &M : Mask) { @@ -1996,8 +1996,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG, - Subtarget))) + if ((Result = + lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) @@ -2053,7 +2053,7 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, /// value is necessary in order to fit the above form. static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, - SDValue V1, SDValue V2, SelectionDAG &DAG, + SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { int SplatIndex = -1; for (const auto &M : Mask) { @@ -2096,10 +2096,29 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget); } +/// Lower VECTOR_SHUFFLE into XVPERMI (if possible). +static SDValue +lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, + SDValue V1, SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + // Only consider XVPERMI_D. + if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64)) + return SDValue(); + + unsigned MaskImm = 0; + for (unsigned i = 0; i < Mask.size(); ++i) { + if (Mask[i] == -1) + continue; + MaskImm |= Mask[i] << (i * 2); + } + + return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1, + DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT())); +} + /// Lower VECTOR_SHUFFLE into XVPERM (if possible). static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask, - MVT VT, SDValue V1, SDValue V2, - SelectionDAG &DAG, + MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) { // LoongArch LASX only have XVPERM_W. if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32)) @@ -2540,14 +2559,16 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, SDValue Result; // TODO: Add more comparison patterns. if (V2.isUndef()) { - if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG, - Subtarget))) + if ((Result = + lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget))) return Result; if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget))) return Result; if ((Result = - lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG, Subtarget))) + lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget))) return Result; // TODO: This comment may be enabled in the future to better match the diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir index 0ddd2aa..0d54bfa 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s -debugify-and-strip-all-safe | FileCheck -check-prefix=GFX9 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s diff --git a/llvm/test/CodeGen/ARM/fnegs.ll b/llvm/test/CodeGen/ARM/fnegs.ll index 435a600..6055b8f 100644 --- a/llvm/test/CodeGen/ARM/fnegs.ll +++ b/llvm/test/CodeGen/ARM/fnegs.ll @@ -10,11 +10,11 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ -; RUN: | FileCheck %s -check-prefix=CORTEXA8U +; RUN: | FileCheck %s -check-prefix=CORTEXA8U-DARWIN ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - \ ; RUN: | FileCheck %s -check-prefix=CORTEXA9 @@ -41,7 +41,10 @@ entry: ; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}} ; CORTEXA8U-LABEL: test1: -; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}} +; CORTEXA8U: vsub.f32 d{{.*}}, d{{.*}}, d{{.*}} + +; CORTEXA8U-DARWIN-LABEL: test1: +; CORTEXA8U-DARWIN: vneg.f32 d{{.*}}, d{{.*}} ; CORTEXA9-LABEL: test1: ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}} @@ -110,9 +113,13 @@ define <2 x float> @fneg_bitcast(i64 %i) { ; CORTEXA8-NOT: vneg.f32 ; CORTEXA8U-LABEL: fneg_bitcast: -; CORTEXA8U-DAG: eor r0, r0, #-2147483648 -; CORTEXA8U-DAG: eor r1, r1, #-2147483648 -; CORTEXA8U-NOT: vneg.f32 +; CORTEXA8U-DAG: vmov.i32 d{{.*}}, #0x80000000 +; CORTEXA8U-DAG: vsub.f32 d{{.*}}, d{{.*}}, d{{.*}} + +; CORTEXA8U-DARWIN-LABEL: fneg_bitcast: +; CORTEXA8U-DARWIN-DAG: eor r0, r0, #-2147483648 +; CORTEXA8U-DARWIN-DAG: eor r1, r1, #-2147483648 +; CORTEXA8U-DARWIN-NOT: vneg.f32 ; CORTEXA9-LABEL: fneg_bitcast: ; CORTEXA9-DAG: eor r0, r0, #-2147483648 diff --git a/llvm/test/CodeGen/ARM/fnmscs.ll b/llvm/test/CodeGen/ARM/fnmscs.ll index 0fa878c..49f9dcf 100644 --- a/llvm/test/CodeGen/ARM/fnmscs.ll +++ b/llvm/test/CodeGen/ARM/fnmscs.ll @@ -13,11 +13,11 @@ ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic %s -o - \ ; RUN: | FileCheck %s -check-prefix=A8 -; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math %s -o - \ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 --denormal-fp-math=preserve-sign %s -o - \ ; RUN: | FileCheck %s -check-prefix=A8U ; RUN: llc -mtriple=arm-darwin -mcpu=cortex-a8 %s -o - \ -; RUN: | FileCheck %s -check-prefix=A8U +; RUN: | FileCheck %s -check-prefix=A8U-DARWIN define float @t1(float %acc, float %a, float %b) nounwind { entry: @@ -31,15 +31,20 @@ entry: ; NEON: vnmla.f32 ; A8U-LABEL: t1: -; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} -; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} +; A8U: vmov.i32 d{{[0-9]+}}, #0x80000000 +; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + +; A8U-DARWIN-LABEL: t1: +; A8U-DARWIN: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} +; A8U-DARWIN: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} ; A8-LABEL: t1: ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} ; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} %0 = fmul float %a, %b %1 = fsub float -0.0, %0 - %2 = fsub float %1, %acc + %2 = fsub float %1, %acc ret float %2 } @@ -55,8 +60,13 @@ entry: ; NEON: vnmla.f32 ; A8U-LABEL: t2: -; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}} -; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} +; A8U: vmov.i32 d{{[0-9]+}}, #0x80000000 +; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +; A8U: vsub.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} + +; A8U-DARWIN-LABEL: t2: +; A8U-DARWIN: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}} +; A8U-DARWIN: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} ; A8-LABEL: t2: ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}} @@ -79,8 +89,12 @@ entry: ; NEON: vnmla.f64 ; A8U-LABEL: t3: -; A8U: vnmul.f64 d ; A8U: vsub.f64 d +; A8U: vsub.f64 d + +; A8U-DARWIN-LABEL: t3: +; A8U-DARWIN: vnmul.f64 d +; A8U-DARWIN: vsub.f64 d ; A8-LABEL: t3: ; A8: vnmul.f64 d @@ -103,8 +117,12 @@ entry: ; NEON: vnmla.f64 ; A8U-LABEL: t4: -; A8U: vnmul.f64 d ; A8U: vsub.f64 d +; A8U: vsub.f64 d + +; A8U-DARWIN-LABEL: t4: +; A8U-DARWIN: vnmul.f64 d +; A8U-DARWIN: vsub.f64 d ; A8-LABEL: t4: ; A8: vnmul.f64 d diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll index 3053942..0b8015d 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll @@ -7,13 +7,12 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: shufflevector_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3 -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238 -; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1 -; CHECK-NEXT: vextrins.d $vr3, $vr2, 16 +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 3 +; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3 +; CHECK-NEXT: vextrins.d $vr2, $vr3, 16 ; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2 ; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 ; CHECK-NEXT: ret entry: %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7> diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll index 24f1b31..245f764 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll @@ -6,11 +6,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) { ; CHECK-LABEL: shuffle_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 ; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ret @@ -34,11 +31,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) { ; CHECK-LABEL: shuffle_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 +; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 ; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0 ; CHECK-NEXT: xvori.b $xr0, $xr1, 0 ; CHECK-NEXT: ret @@ -72,10 +66,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) { define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) { ; CHECK-LABEL: shuffle_v8i32_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0) -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225 ; CHECK-NEXT: ret %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7> ret <8 x i32> %shuffle @@ -84,14 +75,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) { define <4 x i64> @shuffle_v4i64(<4 x i64> %a) { ; CHECK-LABEL: shuffle_v4i64: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0> ret <4 x i64> %shuffle @@ -100,10 +84,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) { define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) { ; CHECK-LABEL: shuffle_v4i64_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0) -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 3> ret <4 x i64> %shuffle @@ -136,14 +117,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) { define <4 x double> @shuffle_v4f64(<4 x double> %a) { ; CHECK-LABEL: shuffle_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0) -; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0) -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1) -; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3 -; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0> ret <4 x double> %shuffle @@ -152,11 +126,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) { define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) { ; CHECK-LABEL: shuffle_v4f64_same_lane: ; CHECK: # %bb.0: -; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0) -; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0) -; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 -; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0 -; CHECK-NEXT: xvori.b $xr0, $xr1, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 75 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1> ret <4 x double> %shuffle diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll index c0fa734..2007f85 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll @@ -127,9 +127,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind { define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind { ; CHECK-LABEL: byte_rotate_v4i64_3: ; CHECK: # %bb.0: -; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8 -; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8 -; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177 ; CHECK-NEXT: ret %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> ret <4 x i64> %shuffle |