diff options
author | Craig Topper <craig.topper@sifive.com> | 2022-12-07 12:57:04 -0800 |
---|---|---|
committer | Craig Topper <craig.topper@sifive.com> | 2022-12-07 12:59:31 -0800 |
commit | 2c52d516dabe8edcb8d8a27444763d8b1186cd28 (patch) | |
tree | c26d9a833909f5237fe5c45f749d00b9cf6c65cd | |
parent | 90f60a6a737b397c49c56371f628e4b6440c00fd (diff) | |
download | llvm-2c52d516dabe8edcb8d8a27444763d8b1186cd28.zip llvm-2c52d516dabe8edcb8d8a27444763d8b1186cd28.tar.gz llvm-2c52d516dabe8edcb8d8a27444763d8b1186cd28.tar.bz2 |
Revert "[RISCV] Return InstSeq from generateInstSeqImpl instead of using an output parameter. NFC"
This reverts commit d24915207c631b7cf637081f333b41bc5159c700.
Thinking about this more this probably chewed up 100+ bytes of stack
for each recursive call. So this probably needs more thought. The
code simplification wasn't that much.
-rw-r--r-- | llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 73 |
1 files changed, 38 insertions, 35 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index 8a8e17e..9e09396 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -45,9 +45,9 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) { } // Recursively generate a sequence for materializing an integer. -static RISCVMatInt::InstSeq -generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) { - RISCVMatInt::InstSeq Res; +static void generateInstSeqImpl(int64_t Val, + const FeatureBitset &ActiveFeatures, + RISCVMatInt::InstSeq &Res) { bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit]; if (isInt<32>(Val)) { @@ -68,7 +68,7 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) { unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI; Res.emplace_back(AddiOpc, Lo12); } - return Res; + return; } assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target"); @@ -76,7 +76,7 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) { // Use BSETI for a single bit. if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val)) { Res.emplace_back(RISCV::BSETI, Log2_64(Val)); - return Res; + return; } // In the worst case, for a full 64-bit constant, a sequence of 8 instructions @@ -140,7 +140,7 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) { } } - Res = generateInstSeqImpl(Val, ActiveFeatures); + generateInstSeqImpl(Val, ActiveFeatures, Res); // Skip shift if we were able to use LUI directly. if (ShiftAmount) { @@ -150,8 +150,6 @@ generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures) { if (Lo12) Res.emplace_back(RISCV::ADDI, Lo12); - - return Res; } static unsigned extractRotateInfo(int64_t Val) { @@ -174,7 +172,8 @@ static unsigned extractRotateInfo(int64_t Val) { namespace llvm::RISCVMatInt { InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { - RISCVMatInt::InstSeq Res = generateInstSeqImpl(Val, ActiveFeatures); + RISCVMatInt::InstSeq Res; + generateInstSeqImpl(Val, ActiveFeatures, Res); // If the low 12 bits are non-zero, the first expansion may end with an ADDI // or ADDIW. If there are trailing zeros, try generating a sign extended @@ -188,7 +187,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { // code. bool IsShiftedCompressible = isInt<6>(ShiftedVal) && !ActiveFeatures[RISCV::TuneLUIADDIFusion]; - auto TmpSeq = generateInstSeqImpl(ShiftedVal, ActiveFeatures); + RISCVMatInt::InstSeq TmpSeq; + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros); // Keep the new sequence if it is an improvement. @@ -203,37 +203,36 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { "Expected RV32 to only need 2 instructions"); unsigned LeadingZeros = countLeadingZeros((uint64_t)Val, ZB_Undefined); uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros; + // Fill in the bits that will be shifted out with 1s. An example where this + // helps is trailing one masks with 32 or more ones. This will generate + // ADDI -1 and an SRLI. + ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros); - { - // Fill in the bits that will be shifted out with 1s. An example where - // this helps is trailing one masks with 32 or more ones. This will - // generate ADDI -1 and an SRLI. - ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros); - auto TmpSeq = generateInstSeqImpl(ShiftedVal, ActiveFeatures); - TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); + RISCVMatInt::InstSeq TmpSeq; + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); - // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) - Res = TmpSeq; - } + // Keep the new sequence if it is an improvement. + if (TmpSeq.size() < Res.size()) + Res = TmpSeq; - { - // Some cases can benefit from filling the lower bits with zeros instead. - ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros); - auto TmpSeq = generateInstSeqImpl(ShiftedVal, ActiveFeatures); - TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); + // Some cases can benefit from filling the lower bits with zeros instead. + ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros); + TmpSeq.clear(); + generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq); + TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros); - // Keep the new sequence if it is an improvement. - if (TmpSeq.size() < Res.size()) - Res = TmpSeq; - } + // Keep the new sequence if it is an improvement. + if (TmpSeq.size() < Res.size()) + Res = TmpSeq; // If we have exactly 32 leading zeros and Zba, we can try using zext.w at // the end of the sequence. if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) { // Try replacing upper bits with 1. uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros); - auto TmpSeq = generateInstSeqImpl(LeadingOnesVal, ActiveFeatures); + TmpSeq.clear(); + generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq); TmpSeq.emplace_back(RISCV::ADD_UW, 0); // Keep the new sequence if it is an improvement. @@ -263,7 +262,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { NewVal = Val & ~0x80000000ll; } if (isInt<32>(NewVal)) { - auto TmpSeq = generateInstSeqImpl(NewVal, ActiveFeatures); + RISCVMatInt::InstSeq TmpSeq; + generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq); TmpSeq.emplace_back(Opc, 31); if (TmpSeq.size() < Res.size()) Res = TmpSeq; @@ -275,7 +275,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { int32_t Lo = Lo_32(Val); uint32_t Hi = Hi_32(Val); Opc = 0; - auto TmpSeq = generateInstSeqImpl(Lo, ActiveFeatures); + RISCVMatInt::InstSeq TmpSeq; + generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq); // Check if it is profitable to use BCLRI/BSETI. if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) { Opc = RISCV::BSETI; @@ -301,6 +302,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { "Expected RV32 to only need 2 instructions"); int64_t Div = 0; unsigned Opc = 0; + RISCVMatInt::InstSeq TmpSeq; // Select the opcode and divisor. if ((Val % 3) == 0 && isInt<32>(Val / 3)) { Div = 3; @@ -314,7 +316,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { } // Build the new instruction sequence. if (Div > 0) { - auto TmpSeq = generateInstSeqImpl(Val / Div, ActiveFeatures); + generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq); TmpSeq.emplace_back(Opc, 0); if (TmpSeq.size() < Res.size()) Res = TmpSeq; @@ -339,7 +341,8 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { // already been processed to LUI+SH*ADD by previous optimization. assert(Lo12 != 0 && "unexpected instruction sequence for immediate materialisation"); - auto TmpSeq = generateInstSeqImpl(Hi52 / Div, ActiveFeatures); + assert(TmpSeq.empty() && "Expected empty TmpSeq"); + generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq); TmpSeq.emplace_back(Opc, 0); TmpSeq.emplace_back(RISCV::ADDI, Lo12); if (TmpSeq.size() < Res.size()) |