diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 34 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/merge-offset-option.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 6 |
5 files changed, 67 insertions, 6 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index d0a8aba..c5e26c1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -57,6 +57,11 @@ static cl::opt<bool> cl::desc("Enable the loop data prefetch pass"), cl::init(false)); +static cl::opt<bool> + EnableMergeBaseOffset("loongarch-enable-merge-offset", + cl::desc("Enable the merge base offset pass"), + cl::init(true), cl::Hidden); + static Reloc::Model getEffectiveRelocModel(const Triple &TT, std::optional<Reloc::Model> RM) { return RM.value_or(Reloc::Static); @@ -214,7 +219,7 @@ void LoongArchPassConfig::addMachineSSAOptimization() { void LoongArchPassConfig::addPreRegAlloc() { addPass(createLoongArchPreRAExpandPseudoPass()); - if (TM->getOptLevel() != CodeGenOptLevel::None) + if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMergeBaseOffset) addPass(createLoongArchMergeBaseOffsetOptPass()); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b8f2999..2371ed4 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3238,6 +3238,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, (_.VT _.RC:$src1), (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.RR]>; + let mayLoad = 1, canFoldAsLoad = 1 in def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1), !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|", @@ -3248,6 +3249,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name, (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.RM]>; } + let mayLoad = 1, canFoldAsLoad = 1 in def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src), OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"# diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 03ac1d3..1d2cd39 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8113,6 +8113,39 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, LiveIntervals *LIS) const { + // If LoadMI is a masked load, check MI having the same mask. + const MCInstrDesc &MCID = get(LoadMI.getOpcode()); + unsigned NumOps = MCID.getNumOperands(); + if (NumOps >= 3) { + Register MaskReg; + const MachineOperand &Op1 = LoadMI.getOperand(1); + const MachineOperand &Op2 = LoadMI.getOperand(2); + + auto IsVKWMClass = [](const TargetRegisterClass *RC) { + return RC == &X86::VK2WMRegClass || RC == &X86::VK4WMRegClass || + RC == &X86::VK8WMRegClass || RC == &X86::VK16WMRegClass || + RC == &X86::VK32WMRegClass || RC == &X86::VK64WMRegClass; + }; + + if (Op1.isReg() && IsVKWMClass(getRegClass(MCID, 1, &RI))) + MaskReg = Op1.getReg(); + else if (Op2.isReg() && IsVKWMClass(getRegClass(MCID, 2, &RI))) + MaskReg = Op2.getReg(); + + if (MaskReg) { + bool HasSameMask = false; + for (unsigned I = 1, E = MI.getDesc().getNumOperands(); I < E; ++I) { + const MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.getReg() == MaskReg) { + HasSameMask = true; + break; + } + } + if (!HasSameMask) + return nullptr; + } + } + // TODO: Support the case where LoadMI loads a wide register, but MI // only uses a subreg. for (auto Op : Ops) { @@ -8121,7 +8154,6 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( } // If loading from a FrameIndex, fold directly from the FrameIndex. - unsigned NumOps = LoadMI.getDesc().getNumOperands(); int FrameIndex; if (isLoadFromStackSlot(LoadMI, FrameIndex)) { if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF)) diff --git a/llvm/test/CodeGen/LoongArch/merge-offset-option.ll b/llvm/test/CodeGen/LoongArch/merge-offset-option.ll new file mode 100644 index 0000000..e5351a6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/merge-offset-option.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -mattr=+d --relocation-model=static -O1 \ +; RUN: < %s | FileCheck %s --check-prefix=MERGE +; RUN: llc --mtriple=loongarch64 -mattr=+d --relocation-model=static -O1 \ +; RUN: --loongarch-enable-merge-offset=false < %s | FileCheck %s --check-prefix=NO_MERGE + +@g = dso_local global i32 zeroinitializer, align 4 + +define void @foo() nounwind { +; MERGE-LABEL: foo: +; MERGE: # %bb.0: +; MERGE-NEXT: pcalau12i $a0, %pc_hi20(g) +; MERGE-NEXT: ld.w $zero, $a0, %pc_lo12(g) +; MERGE-NEXT: ret +; +; NO_MERGE-LABEL: foo: +; NO_MERGE: # %bb.0: +; NO_MERGE-NEXT: pcalau12i $a0, %pc_hi20(g) +; NO_MERGE-NEXT: addi.d $a0, $a0, %pc_lo12(g) +; NO_MERGE-NEXT: ld.w $zero, $a0, 0 +; NO_MERGE-NEXT: ret + %v = load volatile i32, ptr @g + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 8aa898f..da0cef0 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -2119,8 +2119,7 @@ define void @ktest_1(<8 x double> %in, ptr %base) { ; KNL-LABEL: ktest_1: ; KNL: ## %bb.0: ; KNL-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 -; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} -; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} +; KNL-NEXT: vcmpltpd 8(%rdi), %zmm0, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb %al, %al ; KNL-NEXT: je LBB44_2 @@ -2152,8 +2151,7 @@ define void @ktest_1(<8 x double> %in, ptr %base) { ; AVX512BW-LABEL: ktest_1: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: vcmpgtpd (%rdi), %zmm0, %k1 -; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} -; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: vcmpltpd 8(%rdi), %zmm0, %k0 {%k1} ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: testb %al, %al ; AVX512BW-NEXT: je LBB44_2 |