aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp7
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp34
-rw-r--r--llvm/test/CodeGen/LoongArch/merge-offset-option.ll24
-rw-r--r--llvm/test/CodeGen/X86/avx512-mask-op.ll6
5 files changed, 67 insertions, 6 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index d0a8aba..c5e26c1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -57,6 +57,11 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(false));
+static cl::opt<bool>
+ EnableMergeBaseOffset("loongarch-enable-merge-offset",
+ cl::desc("Enable the merge base offset pass"),
+ cl::init(true), cl::Hidden);
+
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
std::optional<Reloc::Model> RM) {
return RM.value_or(Reloc::Static);
@@ -214,7 +219,7 @@ void LoongArchPassConfig::addMachineSSAOptimization() {
void LoongArchPassConfig::addPreRegAlloc() {
addPass(createLoongArchPreRAExpandPseudoPass());
- if (TM->getOptLevel() != CodeGenOptLevel::None)
+ if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMergeBaseOffset)
addPass(createLoongArchMergeBaseOffsetOptPass());
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index b8f2999..2371ed4 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3238,6 +3238,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
(_.VT _.RC:$src1),
(_.VT _.RC:$src0))))], _.ExeDomain>,
EVEX, EVEX_K, Sched<[Sched.RR]>;
+ let mayLoad = 1, canFoldAsLoad = 1 in
def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
@@ -3248,6 +3249,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
(_.VT _.RC:$src0))))], _.ExeDomain>,
EVEX, EVEX_K, Sched<[Sched.RM]>;
}
+ let mayLoad = 1, canFoldAsLoad = 1 in
def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 03ac1d3..1d2cd39 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -8113,6 +8113,39 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
LiveIntervals *LIS) const {
+ // If LoadMI is a masked load, check MI having the same mask.
+ const MCInstrDesc &MCID = get(LoadMI.getOpcode());
+ unsigned NumOps = MCID.getNumOperands();
+ if (NumOps >= 3) {
+ Register MaskReg;
+ const MachineOperand &Op1 = LoadMI.getOperand(1);
+ const MachineOperand &Op2 = LoadMI.getOperand(2);
+
+ auto IsVKWMClass = [](const TargetRegisterClass *RC) {
+ return RC == &X86::VK2WMRegClass || RC == &X86::VK4WMRegClass ||
+ RC == &X86::VK8WMRegClass || RC == &X86::VK16WMRegClass ||
+ RC == &X86::VK32WMRegClass || RC == &X86::VK64WMRegClass;
+ };
+
+ if (Op1.isReg() && IsVKWMClass(getRegClass(MCID, 1, &RI)))
+ MaskReg = Op1.getReg();
+ else if (Op2.isReg() && IsVKWMClass(getRegClass(MCID, 2, &RI)))
+ MaskReg = Op2.getReg();
+
+ if (MaskReg) {
+ bool HasSameMask = false;
+ for (unsigned I = 1, E = MI.getDesc().getNumOperands(); I < E; ++I) {
+ const MachineOperand &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.getReg() == MaskReg) {
+ HasSameMask = true;
+ break;
+ }
+ }
+ if (!HasSameMask)
+ return nullptr;
+ }
+ }
+
// TODO: Support the case where LoadMI loads a wide register, but MI
// only uses a subreg.
for (auto Op : Ops) {
@@ -8121,7 +8154,6 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
}
// If loading from a FrameIndex, fold directly from the FrameIndex.
- unsigned NumOps = LoadMI.getDesc().getNumOperands();
int FrameIndex;
if (isLoadFromStackSlot(LoadMI, FrameIndex)) {
if (isNonFoldablePartialRegisterLoad(LoadMI, MI, MF))
diff --git a/llvm/test/CodeGen/LoongArch/merge-offset-option.ll b/llvm/test/CodeGen/LoongArch/merge-offset-option.ll
new file mode 100644
index 0000000..e5351a6
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/merge-offset-option.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 -mattr=+d --relocation-model=static -O1 \
+; RUN: < %s | FileCheck %s --check-prefix=MERGE
+; RUN: llc --mtriple=loongarch64 -mattr=+d --relocation-model=static -O1 \
+; RUN: --loongarch-enable-merge-offset=false < %s | FileCheck %s --check-prefix=NO_MERGE
+
+@g = dso_local global i32 zeroinitializer, align 4
+
+define void @foo() nounwind {
+; MERGE-LABEL: foo:
+; MERGE: # %bb.0:
+; MERGE-NEXT: pcalau12i $a0, %pc_hi20(g)
+; MERGE-NEXT: ld.w $zero, $a0, %pc_lo12(g)
+; MERGE-NEXT: ret
+;
+; NO_MERGE-LABEL: foo:
+; NO_MERGE: # %bb.0:
+; NO_MERGE-NEXT: pcalau12i $a0, %pc_hi20(g)
+; NO_MERGE-NEXT: addi.d $a0, $a0, %pc_lo12(g)
+; NO_MERGE-NEXT: ld.w $zero, $a0, 0
+; NO_MERGE-NEXT: ret
+ %v = load volatile i32, ptr @g
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 8aa898f..da0cef0 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -2119,8 +2119,7 @@ define void @ktest_1(<8 x double> %in, ptr %base) {
; KNL-LABEL: ktest_1:
; KNL: ## %bb.0:
; KNL-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
-; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
-; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
+; KNL-NEXT: vcmpltpd 8(%rdi), %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB44_2
@@ -2152,8 +2151,7 @@ define void @ktest_1(<8 x double> %in, ptr %base) {
; AVX512BW-LABEL: ktest_1:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vcmpgtpd (%rdi), %zmm0, %k1
-; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z}
-; AVX512BW-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: vcmpltpd 8(%rdi), %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: testb %al, %al
; AVX512BW-NEXT: je LBB44_2