diff options
author | zhijian lin <zhijian@ca.ibm.com> | 2025-08-07 13:13:56 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-07 13:13:56 -0400 |
commit | 093439c688db8d176081176576011275a1aecf23 (patch) | |
tree | b7126682d14d289bdbb40725c707724f711df2d3 | |
parent | d97f0e93642722380be9ed190c17ea895817c339 (diff) | |
download | llvm-093439c688db8d176081176576011275a1aecf23.zip llvm-093439c688db8d176081176576011275a1aecf23.tar.gz llvm-093439c688db8d176081176576011275a1aecf23.tar.bz2 |
[PowerPC][AIX] Using milicode for memcmp instead of libcall (#147093)
AIX has "millicode" routines, which are functions loaded at boot time
into fixed addresses in kernel memory. This allows them to be customized
for the processor. The __memcmp routine is a millicode implementation;
we use millicode for the memcmp function instead of a library call to
improve performance.
-rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAG.h | 3 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h | 4 | ||||
-rw-r--r-- | llvm/include/llvm/IR/RuntimeLibcalls.td | 3 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 38 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h | 5 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/memintr32.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/memintr64.ll | 2 |
11 files changed, 62 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 6f2ad33..3275e32 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1260,6 +1260,9 @@ public: /// stack arguments from being clobbered. LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain); + std::pair<SDValue, SDValue> getMemcmp(SDValue Chain, const SDLoc &dl, + SDValue Dst, SDValue Src, SDValue Size, + const CallInst *CI); /* \p CI if not null is the memset call being lowered. * \p OverrideTailCall is an optional parameter that can be used to override * the tail call optimization decision. */ diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h index 463f0ec..fd00f81 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -23,6 +23,7 @@ namespace llvm { +class CallInst; class SelectionDAG; //===----------------------------------------------------------------------===// @@ -118,8 +119,7 @@ public: virtual std::pair<SDValue, SDValue> EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, - MachinePointerInfo Op1PtrInfo, - MachinePointerInfo Op2PtrInfo) const { + const CallInst *CI) const { return std::make_pair(SDValue(), SDValue()); } diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index df472d4..eadf3ea 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -276,6 +276,7 @@ foreach FPTy = ["F32", "F64", "F128", "PPCF128"] in { } // Memory +def MEMCMP : RuntimeLibcall; def MEMCPY : RuntimeLibcall; def MEMMOVE : RuntimeLibcall; def MEMSET : RuntimeLibcall; @@ -1990,12 +1991,14 @@ defset list<RuntimeLibcallImpl> PPCRuntimeLibcalls = { } defset list<RuntimeLibcallImpl> PPC64AIXCallList = { + def ___memcmp64 : RuntimeLibcallImpl<MEMCMP>; def ___memmove64 : RuntimeLibcallImpl<MEMCPY>; def ___memset64 : RuntimeLibcallImpl<MEMSET>; def ___bzero64 : RuntimeLibcallImpl<BZERO>; } defset list<RuntimeLibcallImpl> PPC32AIXCallList = { + def ___memcmp : RuntimeLibcallImpl<MEMCMP>; def ___memmove : RuntimeLibcallImpl<MEMMOVE>; def ___memset : RuntimeLibcallImpl<MEMSET>; def ___bzero : RuntimeLibcallImpl<BZERO>; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bfa72bf..b9e72c9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8889,6 +8889,44 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } } +std::pair<SDValue, SDValue> +SelectionDAG::getMemcmp(SDValue Chain, const SDLoc &dl, SDValue Mem0, + SDValue Mem1, SDValue Size, const CallInst *CI) { + const char *LibCallName = TLI->getLibcallName(RTLIB::MEMCMP); + if (!LibCallName) + return {}; + + // Emit a library call. + auto GetEntry = [](Type *Ty, SDValue &SDV) { + TargetLowering::ArgListEntry E; + E.Ty = Ty; + E.Node = SDV; + return E; + }; + + PointerType *PT = PointerType::getUnqual(*getContext()); + TargetLowering::ArgListTy Args = { + GetEntry(PT, Mem0), GetEntry(PT, Mem1), + GetEntry(getDataLayout().getIntPtrType(*getContext()), Size)}; + + TargetLowering::CallLoweringInfo CLI(*this); + bool IsTailCall = false; + bool ReturnsFirstArg = CI && funcReturnsFirstArgOfCall(*CI); + IsTailCall = CI && CI->isTailCall() && + isInTailCallPosition(*CI, getTarget(), ReturnsFirstArg); + + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee( + TLI->getLibcallCallingConv(RTLIB::MEMCMP), + Type::getInt32Ty(*getContext()), + getExternalSymbol(LibCallName, TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setTailCall(IsTailCall); + + return TLI->LowerCallTo(CLI); +} + SDValue SelectionDAG::getMemcpy( SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 868e2f4..f5f5c14 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9090,7 +9090,7 @@ bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), - getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); + getValue(Size), &I); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index 95de9f3..4039fed 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -22,3 +22,9 @@ bool PPCSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { return Opcode >= PPCISD::FIRST_STRICTFP_OPCODE && Opcode <= PPCISD::LAST_STRICTFP_OPCODE; } + +std::pair<SDValue, SDValue> PPCSelectionDAGInfo::EmitTargetCodeForMemcmp( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, const CallInst *CI) const { + return DAG.getMemcmp(Chain, dl, Op1, Op2, Op3, CI); +} diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h index 08e2ddb..5635c6a 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -20,6 +20,11 @@ public: bool isTargetMemoryOpcode(unsigned Opcode) const override; bool isTargetStrictFPOpcode(unsigned Opcode) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Op1, SDValue Op2, SDValue Op3, + const CallInst *CI) const; }; } // namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index d76babe..afe838a 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -181,8 +181,7 @@ static SDValue addIPMSequence(const SDLoc &DL, SDValue CCReg, std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, - SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, - MachinePointerInfo Op2PtrInfo) const { + SDValue Src2, SDValue Size, const CallInst *CI) const { SDValue CCReg; // Swap operands to invert CC == 1 vs. CC == 2 cases. if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index c928f34..5a1e0cd 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -41,8 +41,7 @@ public: std::pair<SDValue, SDValue> EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, - MachinePointerInfo Op1PtrInfo, - MachinePointerInfo Op2PtrInfo) const override; + const CallInst *CI) const override; std::pair<SDValue, SDValue> EmitTargetCodeForMemchr(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, diff --git a/llvm/test/CodeGen/PowerPC/memintr32.ll b/llvm/test/CodeGen/PowerPC/memintr32.ll index c07a5af..4f0a996 100644 --- a/llvm/test/CodeGen/PowerPC/memintr32.ll +++ b/llvm/test/CodeGen/PowerPC/memintr32.ll @@ -11,7 +11,7 @@ define i32 @memcmp_test(ptr nocapture noundef readonly %ptr1, ptr nocapture noun ; CHECK-AIX-32-P9-NEXT: mflr r0 ; CHECK-AIX-32-P9-NEXT: stwu r1, -64(r1) ; CHECK-AIX-32-P9-NEXT: stw r0, 72(r1) -; CHECK-AIX-32-P9-NEXT: bl .memcmp[PR] +; CHECK-AIX-32-P9-NEXT: bl .___memcmp[PR] ; CHECK-AIX-32-P9-NEXT: nop ; CHECK-AIX-32-P9-NEXT: addi r1, r1, 64 ; CHECK-AIX-32-P9-NEXT: lwz r0, 8(r1) diff --git a/llvm/test/CodeGen/PowerPC/memintr64.ll b/llvm/test/CodeGen/PowerPC/memintr64.ll index b3a6650..0b0e556 100644 --- a/llvm/test/CodeGen/PowerPC/memintr64.ll +++ b/llvm/test/CodeGen/PowerPC/memintr64.ll @@ -39,7 +39,7 @@ define noundef i32 @_Z11memcmp_testPKvS0_m(ptr noundef readonly captures(none) % ; CHECK-AIX-64-P9-NEXT: mflr r0 ; CHECK-AIX-64-P9-NEXT: stdu r1, -112(r1) ; CHECK-AIX-64-P9-NEXT: std r0, 128(r1) -; CHECK-AIX-64-P9-NEXT: bl .memcmp[PR] +; CHECK-AIX-64-P9-NEXT: bl .___memcmp64[PR] ; CHECK-AIX-64-P9-NEXT: nop ; CHECK-AIX-64-P9-NEXT: addi r1, r1, 112 ; CHECK-AIX-64-P9-NEXT: ld r0, 16(r1) |