aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Lau <luke@igalia.com>2024-02-22 11:50:27 +0800
committerllvmbot <60944935+llvmbot@users.noreply.github.com>2024-03-19 13:57:52 -0700
commita2c93b34dfdf6b2e5d16a5068e92f30bbc5d0ba7 (patch)
tree968c2d1bb97f7cabb0afc6cafb40eeb6bae24248
parenta9d4ed71707d36bc554bfe38408c74c285b11e6b (diff)
downloadllvm-a2c93b34dfdf6b2e5d16a5068e92f30bbc5d0ba7.zip
llvm-a2c93b34dfdf6b2e5d16a5068e92f30bbc5d0ba7.tar.gz
llvm-a2c93b34dfdf6b2e5d16a5068e92f30bbc5d0ba7.tar.bz2
[RISCV] Fix mgather -> riscv.masked.strided.load combine not extending indices (#82506)
This fixes the miscompile reported in #82430 by telling isSimpleVIDSequence to sign extend to XLen instead of the width of the indices, since the "sequence" of indices generated by a strided load will be at XLen. This was the simplest way I could think of getting isSimpleVIDSequence to treat the indexes as if they were zero extended to XLenVT. Another way we could do this is by refactoring out the "get constant integers" part from isSimpleVIDSequence and handle them as APInts so we can separately zero extend it. Fixes #82430 (cherry picked from commit 815644b4dd882ade2e5649d4f97c3dd6f7aea200)
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll12
2 files changed, 16 insertions, 16 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 80447d0..a0cec42 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3192,7 +3192,8 @@ static std::optional<uint64_t> getExactInteger(const APFloat &APF,
// Note that this method will also match potentially unappealing index
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
// determine whether this is worth generating code for.
-static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
+static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
+ unsigned EltSizeInBits) {
unsigned NumElts = Op.getNumOperands();
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
bool IsInteger = Op.getValueType().isInteger();
@@ -3200,7 +3201,7 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
std::optional<unsigned> SeqStepDenom;
std::optional<int64_t> SeqStepNum, SeqAddend;
std::optional<std::pair<uint64_t, unsigned>> PrevElt;
- unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
+ assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
for (unsigned Idx = 0; Idx < NumElts; Idx++) {
// Assume undef elements match the sequence; we just have to be careful
// when interpolating across them.
@@ -3213,14 +3214,14 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
return std::nullopt;
Val = Op.getConstantOperandVal(Idx) &
- maskTrailingOnes<uint64_t>(EltSizeInBits);
+ maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
} else {
// The BUILD_VECTOR must be all constants.
if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
return std::nullopt;
if (auto ExactInteger = getExactInteger(
cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
- EltSizeInBits))
+ Op.getScalarValueSizeInBits()))
Val = *ExactInteger;
else
return std::nullopt;
@@ -3276,11 +3277,11 @@ static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
uint64_t Val;
if (IsInteger) {
Val = Op.getConstantOperandVal(Idx) &
- maskTrailingOnes<uint64_t>(EltSizeInBits);
+ maskTrailingOnes<uint64_t>(Op.getScalarValueSizeInBits());
} else {
Val = *getExactInteger(
cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
- EltSizeInBits);
+ Op.getScalarValueSizeInBits());
}
uint64_t ExpectedVal =
(int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
@@ -3550,7 +3551,7 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// Try and match index sequences, which we can lower to the vid instruction
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
- if (auto SimpleVID = isSimpleVIDSequence(Op)) {
+ if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
int64_t StepNumerator = SimpleVID->StepNumerator;
unsigned StepDenominator = SimpleVID->StepDenominator;
int64_t Addend = SimpleVID->Addend;
@@ -15562,7 +15563,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (Index.getOpcode() == ISD::BUILD_VECTOR &&
MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
- if (std::optional<VIDSequence> SimpleVID = isSimpleVIDSequence(Index);
+ // The sequence will be XLenVT, not the type of Index. Tell
+ // isSimpleVIDSequence this so we avoid overflow.
+ if (std::optional<VIDSequence> SimpleVID =
+ isSimpleVIDSequence(Index, Subtarget.getXLen());
SimpleVID && SimpleVID->StepDenominator == 1) {
const int64_t StepNumerator = SimpleVID->StepNumerator;
const int64_t Addend = SimpleVID->Addend;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 60eec35..88c299a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -15086,23 +15086,19 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
ret <32 x i64> %x
}
-; FIXME: This is a miscompile triggered by the mgather ->
-; riscv.masked.strided.load combine. In order for it to trigger we need either a
-; strided gather that RISCVGatherScatterLowering doesn't pick up, or a new
-; strided gather generated by the widening sew combine.
define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
; RV32V-LABEL: masked_gather_widen_sew_negative_stride:
; RV32V: # %bb.0:
-; RV32V-NEXT: addi a0, a0, -120
-; RV32V-NEXT: li a1, 120
+; RV32V-NEXT: addi a0, a0, 136
+; RV32V-NEXT: li a1, -136
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), a1
; RV32V-NEXT: ret
;
; RV64V-LABEL: masked_gather_widen_sew_negative_stride:
; RV64V: # %bb.0:
-; RV64V-NEXT: addi a0, a0, -120
-; RV64V-NEXT: li a1, 120
+; RV64V-NEXT: addi a0, a0, 136
+; RV64V-NEXT: li a1, -136
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vlse64.v v8, (a0), a1
; RV64V-NEXT: ret