aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
diff options
context:
space:
mode:
authorStefan Pintilie <stefanp@ca.ibm.com>2022-08-16 11:08:33 -0500
committerStefan Pintilie <stefanp@ca.ibm.com>2022-08-19 07:05:40 -0500
commit1492c88f494cb09de0ebc7fb77a84c41d0aa93ce (patch)
tree0a0dd9b55564d3f7043e48d3ea1b97457081e182 /llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
parent9f21d6e953e559d695b1eb372976533eccba2b06 (diff)
downloadllvm-1492c88f494cb09de0ebc7fb77a84c41d0aa93ce.zip
llvm-1492c88f494cb09de0ebc7fb77a84c41d0aa93ce.tar.gz
llvm-1492c88f494cb09de0ebc7fb77a84c41d0aa93ce.tar.bz2
[PowerPC] Fix bugs in sign-/zero-extension elimination
This patch fixes the following two bugs in `PPCInstrInfo::isSignOrZeroExtended` helper, which is used from sign-/zero-extension elimination in PPCMIPeephole pass. - Registers defined by load with update (e.g. LBZU) were identified as already sign or zero-extended. But it is true only for the first def (loaded value) and not for the second def (i.e. updated pointer). - Registers defined by ORIS/XORIS were identified as already sign-extended. But, it is not true for sign extension depending on the immediate (while it is ok for zero extension). To handle the first case, the parameter for the helpers is changed from `MachineInstr` to a register number to distinguish first and second defs. Also, this patch moves the initialization of PPCMIPeepholePass to allow mir test case. Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D40554
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCMIPeephole.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp45
1 files changed, 36 insertions, 9 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index c9d0645..6788bd85 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -73,12 +73,12 @@ ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
static cl::opt<bool>
EnableSExtElimination("ppc-eliminate-signext",
cl::desc("enable elimination of sign-extensions"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableZExtElimination("ppc-eliminate-zeroext",
cl::desc("enable elimination of zero-extensions"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
static cl::opt<bool>
EnableTrapOptimization("ppc-opt-conditional-trap",
@@ -172,8 +172,10 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
// This function returns number of known zero bits in output of MI
// starting from the most significant bit.
-static unsigned
-getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
+static unsigned getKnownLeadingZeroCount(const unsigned Reg,
+ const PPCInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ MachineInstr *MI = MRI->getVRegDef(Reg);
unsigned Opcode = MI->getOpcode();
if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
@@ -217,7 +219,7 @@ getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
return 56;
- if (TII->isZeroExtended(*MI))
+ if (TII->isZeroExtended(Reg, MRI))
return 32;
return 0;
@@ -782,8 +784,8 @@ bool PPCMIPeephole::simplifyCode() {
SrcMI->getOpcode() == PPC::LHZX) {
if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
break;
- auto is64Bit = [] (unsigned Opcode) {
- return Opcode == PPC::EXTSH8;
+ auto is64Bit = [](unsigned Opcode) {
+ return Opcode == PPC::EXTSH8 || Opcode == PPC::EXTSH8_32_64;
};
auto isXForm = [] (unsigned Opcode) {
return Opcode == PPC::LHZX;
@@ -798,6 +800,7 @@ bool PPCMIPeephole::simplifyCode() {
};
unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
isXForm(SrcMI->getOpcode()));
+
LLVM_DEBUG(dbgs() << "Zero-extending load\n");
LLVM_DEBUG(SrcMI->dump());
LLVM_DEBUG(dbgs() << "and sign-extension\n");
@@ -840,8 +843,29 @@ bool PPCMIPeephole::simplifyCode() {
if (isXForm) return PPC::LWAX_32;
else return PPC::LWA_32;
};
+
+ // The transformation from a zero-extending load to a sign-extending
+ // load is only legal when the displacement is a multiple of 4.
+ // If the displacement is not at least 4 byte aligned, don't perform
+ // the transformation.
+ bool IsWordAligned = false;
+ if (SrcMI->getOperand(1).isGlobal()) {
+ const GlobalObject *GO =
+ dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
+ if (GO && GO->getAlignment() >= 4)
+ IsWordAligned = true;
+ } else if (SrcMI->getOperand(1).isImm()) {
+ int64_t Value = SrcMI->getOperand(1).getImm();
+ if (Value % 4 == 0)
+ IsWordAligned = true;
+ }
+
unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
isXForm(SrcMI->getOpcode()));
+
+ if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
+ break;
+
LLVM_DEBUG(dbgs() << "Zero-extending load\n");
LLVM_DEBUG(SrcMI->dump());
LLVM_DEBUG(dbgs() << "and sign-extension\n");
@@ -853,7 +877,7 @@ bool PPCMIPeephole::simplifyCode() {
Simplified = true;
NumEliminatedSExt++;
} else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
- TII->isSignExtended(*SrcMI)) {
+ TII->isSignExtended(NarrowReg, MRI)) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
@@ -904,8 +928,11 @@ bool PPCMIPeephole::simplifyCode() {
if (Register::isVirtualRegister(CopyReg))
SrcMI = MRI->getVRegDef(CopyReg);
}
+ if (!SrcMI->getOperand(0).isReg())
+ break;
- unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
+ unsigned KnownZeroCount =
+ getKnownLeadingZeroCount(SrcMI->getOperand(0).getReg(), TII, MRI);
if (MI.getOperand(3).getImm() <= KnownZeroCount) {
LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),