aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorPierre van Houtryve <pierre.vanhoutryve@amd.com>2024-06-17 13:42:00 +0200
committerGitHub <noreply@github.com>2024-06-17 13:42:00 +0200
commit770393bb99d947b908031f83e2d064b9666740e4 (patch)
treeb53686389b1f0d46ff4757a4c0776d50c49f1c57 /llvm/lib/CodeGen
parentc2d9f253e5a4074bb965e483cca2fe968b78693c (diff)
downloadllvm-770393bb99d947b908031f83e2d064b9666740e4.zip
llvm-770393bb99d947b908031f83e2d064b9666740e4.tar.gz
llvm-770393bb99d947b908031f83e2d064b9666740e4.tar.bz2
[MachineLICM] Correctly Apply Register Masks (#95746)
Fix regression introduced in d4b8b72
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp35
1 files changed, 13 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 6c5170e..1c76d72 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -426,38 +426,29 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI,
BitVector &RUs,
const uint32_t *Mask) {
- // Iterate over the RegMask raw to avoid constructing a BitVector, which is
- // expensive as it implies dynamically allocating memory.
- //
- // We also work backwards.
+ BitVector ClobberedRUs(TRI.getNumRegUnits(), true);
const unsigned NumRegs = TRI.getNumRegs();
const unsigned MaskWords = (NumRegs + 31) / 32;
for (unsigned K = 0; K < MaskWords; ++K) {
- // We want to set the bits that aren't in RegMask, so flip it.
- uint32_t Word = ~Mask[K];
-
- // Iterate all set bits, starting from the right.
- while (Word) {
- const unsigned SetBitIdx = countr_zero(Word);
-
- // The bits are numbered from the LSB in each word.
- const unsigned PhysReg = (K * 32) + SetBitIdx;
-
- // Clear the bit at SetBitIdx. Doing it this way appears to generate less
- // instructions on x86. This works because negating a number will flip all
- // the bits after SetBitIdx. So (Word & -Word) == (1 << SetBitIdx), but
- // faster.
- Word ^= Word & -Word;
+ const uint32_t Word = Mask[K];
+ if (!Word)
+ continue;
+ for (unsigned Bit = 0; Bit < 32; ++Bit) {
+ const unsigned PhysReg = (K * 32) + Bit;
if (PhysReg == NumRegs)
- return;
+ break;
- if (PhysReg) {
+ // Check if we have a valid PhysReg that is set in the mask.
+ // FIXME: We shouldn't have to check for PhysReg.
+ if (PhysReg && ((Word >> Bit) & 1)) {
for (MCRegUnitIterator RUI(PhysReg, &TRI); RUI.isValid(); ++RUI)
- RUs.set(*RUI);
+ ClobberedRUs.reset(*RUI);
}
}
}
+
+ RUs |= ClobberedRUs;
}
/// Examine the instruction for potentai LICM candidate. Also