From 770393bb99d947b908031f83e2d064b9666740e4 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Mon, 17 Jun 2024 13:42:00 +0200 Subject: [MachineLICM] Correctly Apply Register Masks (#95746) Fix regression introduced in d4b8b72 --- llvm/lib/CodeGen/MachineLICM.cpp | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 6c5170e..1c76d72 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -426,38 +426,29 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI, BitVector &RUs, const uint32_t *Mask) { - // Iterate over the RegMask raw to avoid constructing a BitVector, which is - // expensive as it implies dynamically allocating memory. - // - // We also work backwards. + BitVector ClobberedRUs(TRI.getNumRegUnits(), true); const unsigned NumRegs = TRI.getNumRegs(); const unsigned MaskWords = (NumRegs + 31) / 32; for (unsigned K = 0; K < MaskWords; ++K) { - // We want to set the bits that aren't in RegMask, so flip it. - uint32_t Word = ~Mask[K]; - - // Iterate all set bits, starting from the right. - while (Word) { - const unsigned SetBitIdx = countr_zero(Word); - - // The bits are numbered from the LSB in each word. - const unsigned PhysReg = (K * 32) + SetBitIdx; - - // Clear the bit at SetBitIdx. Doing it this way appears to generate less - // instructions on x86. This works because negating a number will flip all - // the bits after SetBitIdx. So (Word & -Word) == (1 << SetBitIdx), but - // faster. - Word ^= Word & -Word; + const uint32_t Word = Mask[K]; + if (!Word) + continue; + for (unsigned Bit = 0; Bit < 32; ++Bit) { + const unsigned PhysReg = (K * 32) + Bit; if (PhysReg == NumRegs) - return; + break; - if (PhysReg) { + // Check if we have a valid PhysReg that is set in the mask. + // FIXME: We shouldn't have to check for PhysReg. + if (PhysReg && ((Word >> Bit) & 1)) { for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI) - RUs.set(*RUI); + ClobberedRUs.reset(*RUI); } } } + + RUs |= ClobberedRUs; } /// Examine the instruction for potentai LICM candidate. Also -- cgit v1.1