diff options
author | Kirill Bobyrev <kbobyrev@google.com> | 2020-02-27 11:58:32 +0100 |
---|---|---|
committer | Kirill Bobyrev <kbobyrev@google.com> | 2020-02-27 11:58:32 +0100 |
commit | 014728413f354c8f44375074b331e68ce194bbd2 (patch) | |
tree | 508cc11eb227779134f668340fee8c279dbc092a | |
parent | 58d9e8194e4e4ed18175400792030920a3bb7636 (diff) | |
download | llvm-014728413f354c8f44375074b331e68ce194bbd2.zip llvm-014728413f354c8f44375074b331e68ce194bbd2.tar.gz llvm-014728413f354c8f44375074b331e68ce194bbd2.tar.bz2 |
Revert "[Hexagon] Improve casting of boolean HVX vectors to scalars"
This reverts commit 7691790dfd1011d08f5468f63952d7690755aad4.
The patch is failing tests with MSAN:
http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fast/builds/39054/steps/check-llvm%20msan/logs/stdio
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 35 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.h | 19 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 343 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/isel-store-bitcast-v128i1.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll | 76 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/store-vector-pred.ll | 76 |
7 files changed, 275 insertions, 306 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 2e7a65b..f396757 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1681,6 +1681,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, VT, Custom); } + setOperationAction(ISD::STORE, MVT::v128i1, Custom); + for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) { setCondCodeAction(ISD::SETNE, VT, Expand); @@ -1694,6 +1696,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Custom-lower bitcasts from i8 to v8i1. setOperationAction(ISD::BITCAST, MVT::i8, Custom); + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::SETCC, MVT::v2i16, Custom); setOperationAction(ISD::VSELECT, MVT::v4i8, Custom); setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); @@ -3077,12 +3081,6 @@ void HexagonTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - if (isHvxOperation(N)) { - LowerHvxOperationWrapper(N, Results, DAG); - if (!Results.empty()) - return; - } - // We are only custom-lowering stores to verify the alignment of the // address if it is a compile-time constant. Since a store can be modified // during type-legalization (the value being stored may need legalization), @@ -3096,12 +3094,6 @@ void HexagonTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - if (isHvxOperation(N)) { - ReplaceHvxNodeResults(N, Results, DAG); - if (!Results.empty()) - return; - } - const SDLoc &dl(N); switch (N->getOpcode()) { case ISD::SRL: @@ -3386,25 +3378,12 @@ EVT HexagonTargetLowering::getOptimalMemOpType( return MVT::Other; } -bool HexagonTargetLowering::allowsMemoryAccess(LLVMContext &Context, - const DataLayout &DL, EVT VT, unsigned AddrSpace, unsigned Alignment, - MachineMemOperand::Flags Flags, bool *Fast) const { - MVT SVT = VT.getSimpleVT(); - if (Subtarget.isHVXVectorType(SVT, true)) - return allowsHvxMemoryAccess(SVT, Alignment, Flags, Fast); - return TargetLoweringBase::allowsMemoryAccess( - Context, DL, VT, AddrSpace, Alignment, Flags, Fast); -} - bool HexagonTargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned AddrSpace, unsigned Alignment, - MachineMemOperand::Flags Flags, bool *Fast) const { - MVT SVT = VT.getSimpleVT(); - if (Subtarget.isHVXVectorType(SVT, true)) - return allowsHvxMisalignedMemoryAccesses(SVT, Alignment, Flags, Fast); + EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags, + bool *Fast) const { if (Fast) *Fast = false; - return false; + return Subtarget.isHVXVectorType(VT.getSimpleVT()); } std::pair<const TargetRegisterClass*, uint8_t> diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 4987130..7239c1d 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -305,12 +305,8 @@ namespace HexagonISD { EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; - bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, - unsigned AddrSpace, unsigned Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) + unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const override; /// Returns relocation base for the given PIC jumptable. @@ -408,11 +404,6 @@ namespace HexagonISD { VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; - bool allowsHvxMemoryAccess(MVT VecTy, unsigned Alignment, - MachineMemOperand::Flags Flags, bool *Fast) const; - bool allowsHvxMisalignedMemoryAccesses(MVT VecTy, unsigned Align, - MachineMemOperand::Flags Flags, bool *Fast) const; - bool isHvxSingleTy(MVT Ty) const; bool isHvxPairTy(MVT Ty) const; bool isHvxBoolTy(MVT Ty) const; @@ -447,8 +438,6 @@ namespace HexagonISD { const SDLoc &dl, SelectionDAG &DAG) const; SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const; - SDValue compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy, - SelectionDAG &DAG) const; SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const; @@ -478,12 +467,8 @@ namespace HexagonISD { const override; bool isHvxOperation(SDValue Op) const; - bool isHvxOperation(SDNode *N) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; - void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; - void ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const; + SDValue PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; }; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 153087a..2b5257e 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -65,15 +65,6 @@ HexagonTargetLowering::initializeHVXLowering() { AddPromotedToType(Opc, FromTy, ToTy); }; - // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). - // Note: v16i1 -> i16 is handled in type legalization instead of op - // legalization. - setOperationAction(ISD::BITCAST, MVT::i16, Custom); - setOperationAction(ISD::BITCAST, MVT::i32, Custom); - setOperationAction(ISD::BITCAST, MVT::i64, Custom); - setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); - setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); - setOperationAction(ISD::BITCAST, MVT::i128, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -299,25 +290,6 @@ HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { Ty.getVectorElementType() == MVT::i1; } -bool -HexagonTargetLowering::allowsHvxMemoryAccess(MVT VecTy, unsigned Alignment, - MachineMemOperand::Flags Flags, bool *Fast) const { - // Bool vectors are excluded by default, but make it explicit to - // emphasize that bool vectors cannot be loaded or stored. - return Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false); -} - -bool -HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(MVT VecTy, - unsigned Align, MachineMemOperand::Flags Flags, bool *Fast) const { - if (!Subtarget.isHVXVectorType(VecTy)) - return false; - // XXX Should this be false? vmemu are a bit slower than vmem. - if (Fast) - *Fast = true; - return true; -} - SDValue HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, SelectionDAG &DAG) const { @@ -1058,61 +1030,6 @@ HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, } SDValue -HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, - MVT ResTy, SelectionDAG &DAG) const { - // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] - // (i.e. the entire predicate register) to bits [0..HwLen-1] of a - // vector register. The remaining bits of the vector register are - // unspecified. - - MachineFunction &MF = DAG.getMachineFunction(); - unsigned HwLen = Subtarget.getVectorLength(); - MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); - MVT PredTy = ty(VecQ); - unsigned PredLen = PredTy.getVectorNumElements(); - assert(HwLen % PredLen == 0); - MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen); - - Type *Int8Ty = Type::getInt8Ty(*DAG.getContext()); - SmallVector<Constant*, 128> Tmp; - // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... - // These are bytes with the LSB rotated left with respect to their index. - for (unsigned i = 0; i != HwLen/8; ++i) { - for (unsigned j = 0; j != 8; ++j) - Tmp.push_back(ConstantInt::get(Int8Ty, 1u << j)); - } - Constant *CV = ConstantVector::get(Tmp); - unsigned Align = HwLen; - SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Align), DAG); - SDValue Bytes = DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(MF), Align); - - // Select the bytes that correspond to true bits in the vector predicate. - SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes), - getZero(dl, VecTy, DAG)); - // Calculate the OR of all bytes in each group of 8. That will compress - // all the individual bits into a single byte. - // First, OR groups of 4, via vrmpy with 0x01010101. - SDValue All1 = - DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32)); - SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG); - // Then rotate the accumulated vector by 4 bytes, and do the final OR. - SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy, - {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG); - SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot}); - - // Pick every 8th byte and coalesce them at the beginning of the output. - // For symmetry, coalesce every 1+8th byte after that, then every 2+8th - // byte and so on. - SmallVector<int,128> Mask; - for (unsigned i = 0; i != HwLen; ++i) - Mask.push_back((8*i) % HwLen + i/(HwLen/8)); - SDValue Collect = - DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask); - return DAG.getBitcast(ResTy, Collect); -} - -SDValue HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); @@ -1520,58 +1437,192 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { return T7; } +// This function does the computation needed to bitcast a vector of predicate +// register to a vector of integers. SDValue -HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { - SDValue ValQ = Op.getOperand(0); - MVT ResTy = ty(Op); - MVT VecTy = ty(ValQ); +HexagonTargetLowering::HvxVecPredBitcastComputation(SDValue Op, + SelectionDAG &DAG) const { const SDLoc &dl(Op); + MVT VecTy; + int Length; + if (Subtarget.useHVX64BOps()) { + VecTy = MVT::getVectorVT(MVT::i32, 16); + Length = 2; + } + if (Subtarget.useHVX128BOps()) { + VecTy = MVT::getVectorVT(MVT::i32, 32); + Length = 4; + } + // r0 = ##0x08040201 // Pre-rotated bits per 4 consecutive bytes. + SDValue C8421 = DAG.getTargetConstant(0x08040201, dl, MVT::i32); + SDValue InstrC8421 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8421, DAG); + // v0 = vand(q0,r0) + SDValue Vand = + getInstr(Hexagon::V6_vandqrt, dl, VecTy, {Op, InstrC8421}, DAG); + + // Or the bytes in each word into a single byte: that will form packs + // of 4 bits of the output. + // v1 = valign(v0,v0,#2) + SDValue C2 = DAG.getTargetConstant(2, dl, MVT::i32); + SDValue Valign = + getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vand, Vand, C2}, DAG); + // v0 = vor(v0,v1) + SDValue Vor = getInstr(Hexagon::V6_vor, dl, VecTy, {Vand, Valign}, DAG); + // v1 = valign(v0,v0,#1) + SDValue C1 = DAG.getTargetConstant(1, dl, MVT::i32); + SDValue Valign1 = + getInstr(Hexagon::V6_valignbi, dl, VecTy, {Vor, Vor, C1}, DAG); + // v0 = vor(v0,v1) + SDValue Vor1 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor, Valign1}, DAG); + + // Clear all the bytes per word except the lowest one. + // r0 = #0xff + SDValue Cff = DAG.getTargetConstant(0xff, dl, MVT::i32); + SDValue InstrCff = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, Cff, DAG); + // v1 = vsplat(r0) + SDValue Vsplat = getInstr(Hexagon::V6_lvsplatw, dl, VecTy, InstrCff, DAG); + // v0 = vand(v0,v1) + SDValue Vand1 = getInstr(Hexagon::V6_vand, dl, VecTy, {Vor1, Vsplat}, DAG); + + // Shift each word left by its index to position the 4-bit packs for oring. + // The words 0..8 and 16..31 need to be ored to form the 64-bit output. + // r0 = ##.Lshifts + // .Lshifts: + // .word 0 + // .word 4 + // .word 8 + // .word 12 + // .word 16 + // .word 20 + // .word 24 + // .word 28 + // .word 0 + // .word 4 + // .word 8 + // .word 12 + // .word 16 + // .word 20 + // .word 24 + // .word 28 + // v1 = vmem(r0+#0) + SmallVector<SDValue, 32> Elems; + for (int i = 0; i < Length; ++i) { + Elems.push_back(DAG.getConstant(0, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(4, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(8, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(12, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(16, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(20, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(24, dl, MVT::i32)); + Elems.push_back(DAG.getConstant(28, dl, MVT::i32)); + } - if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) { - unsigned HwLen = Subtarget.getVectorLength(); - MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); - SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG); - unsigned BitWidth = ResTy.getSizeInBits(); - - if (BitWidth < 64) { - SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32), - dl, MVT::i32, DAG); - if (BitWidth == 32) - return W0; - assert(BitWidth < 32u); - return DAG.getZExtOrTrunc(W0, dl, ResTy); - } + SDValue BV = DAG.getBuildVector(VecTy, dl, Elems); + // v0.w = vasl(v0.w,v1.w) + SDValue Vasl = getInstr(Hexagon::V6_vaslwv, dl, VecTy, {Vand1, BV}, DAG); + + // 3 rounds of oring. + // r0 = #16 // HwLen/4 + SDValue C16 = DAG.getTargetConstant(16, dl, MVT::i32); + SDValue InstrC16 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C16, DAG); + // v1 = vror(v0,r0) + SDValue Vror = getInstr(Hexagon::V6_vror, dl, VecTy, {Vasl, InstrC16}, DAG); + // v0 = vor(v0,v1) + SDValue Vor2 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vasl, Vror}, DAG); + // r0 = #8 // HwLen/8 + SDValue C8 = DAG.getTargetConstant(8, dl, MVT::i32); + SDValue InstrC8 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C8, DAG); + // v1 = vror(v0,r0) + SDValue Vror1 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor2, InstrC8}, DAG); + // v0 = vor(v0,v1) + SDValue Vor3 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor2, Vror1}, DAG); + // r0 = #4 // HwLen/16 + SDValue C4 = DAG.getTargetConstant(4, dl, MVT::i32); + SDValue InstrC4 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C4, DAG); + // v1 = vror(v0,r0) + SDValue Vror2 = getInstr(Hexagon::V6_vror, dl, VecTy, {Vor3, InstrC4}, DAG); + // v0 = vor(v0,v1) + SDValue Vor4 = getInstr(Hexagon::V6_vor, dl, VecTy, {Vor3, Vror2}, DAG); + return Vor4; +} - // The result is >= 64 bits. The only options are 64 or 128. - assert(BitWidth == 64 || BitWidth == 128); - SmallVector<SDValue,4> Words; - for (unsigned i = 0; i != BitWidth/32; ++i) { - SDValue W = extractHvxElementReg( - VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG); - Words.push_back(W); - } - SmallVector<SDValue,2> Combines; - assert(Words.size() % 2 == 0); - for (unsigned i = 0, e = Words.size(); i < e; i += 2) { - SDValue C = DAG.getNode( - HexagonISD::COMBINE, dl, MVT::i64, {Words[i], Words[i+1]}); - Combines.push_back(C); - } +SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op, + SelectionDAG &DAG) const { + auto *N = Op.getNode(); + EVT VT = N->getValueType(0); + const SDLoc &dl(Op); + SDValue Q0 = N->getOperand(0); + EVT VTOp = Q0.getNode()->getValueType(0); + if (!(VT == MVT::i64 || VT == MVT::i32) || + !(VTOp == MVT::v64i1 || VTOp == MVT::v32i1)) + return Op; - if (BitWidth == 64) - return Combines[0]; - - // It must be i128. I128 is not a legal type, so this part will be - // executed during type legalization. We need to generate code that - // the default expansion can break up into smaller pieces. - SDValue C0 = DAG.getZExtOrTrunc(Combines[0], dl, ResTy); - SDValue C1 = DAG.getNode(ISD::SHL, dl, ResTy, - DAG.getZExtOrTrunc(Combines[1], dl, ResTy), - DAG.getConstant(64, dl, MVT::i32)); - return DAG.getNode(ISD::OR, dl, ResTy, C0, C1); + SDValue Vor4 = HvxVecPredBitcastComputation(Q0, DAG); + + // The output is v.w[8]:v.w[0] + // r3 = #0 + SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32); + SDValue InstrC0 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C0, DAG); + // r0 = vextract(v0,r3) + SDValue Res = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC0}, DAG); + if (VT == MVT::i64) { + // r3 = #32 + SDValue C32 = DAG.getTargetConstant(32, dl, MVT::i32); + SDValue InstrC32 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C32, DAG); + // r1 = vextract(v0,r3) + SDValue Vextract = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC32}, DAG); + Res = getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract, Res}, DAG); } + return Res; +} - return Op; +SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op, + SelectionDAG &DAG) const { + auto *N = Op.getNode(); + const SDLoc &dl(Op); + SDValue Q0 = N->getOperand(1); + EVT VTOp = Q0.getNode()->getValueType(0); + if (Op.getOpcode() != ISD::STORE || VTOp != MVT::v128i1) + return Op; + SDValue Vor4 = HvxVecPredBitcastComputation(Q0, DAG); + // The output is v.w[8]:v.w[0] + // r3 = #0 + SDValue C0 = DAG.getTargetConstant(0, dl, MVT::i32); + SDValue InstrC0 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C0, DAG); + // r0 = vextract(v0,r3) + SDValue Vextract0 = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC0}, DAG); + // r3 = #32 + SDValue C32 = DAG.getTargetConstant(32, dl, MVT::i32); + SDValue InstrC32 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C32, DAG); + // r1 = vextract(v0,r3) + SDValue Vextract1 = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC32}, DAG); + SDValue Combine0 = + getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract1, Vextract0}, DAG); + // r3 = #64 + SDValue C64 = DAG.getTargetConstant(64, dl, MVT::i32); + SDValue InstrC64 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C64, DAG); + // r0 = vextract(v0,r3) + SDValue Vextract2 = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC64}, DAG); + // r3 = #96 + SDValue C96 = DAG.getTargetConstant(96, dl, MVT::i32); + SDValue InstrC96 = getInstr(Hexagon::A2_tfrsi, dl, MVT::i32, C96, DAG); + // r1 = vextract(v0,r3) + SDValue Vextract3 = + getInstr(Hexagon::V6_extractw, dl, MVT::i32, {Vor4, InstrC96}, DAG); + SDValue Combine1 = + getInstr(Hexagon::A2_combinew, dl, MVT::i64, {Vextract3, Vextract2}, DAG); + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + SDValue C8 = DAG.getTargetConstant(8, dl, MVT::i32); + const SDValue Ops1[] = {ST->getBasePtr(), C8, Combine1, ST->getChain()}; + SDValue Store1 = getInstr(Hexagon::S2_storerd_io, dl, MVT::Other, Ops1, DAG); + const SDValue Ops0[] = {ST->getBasePtr(), C0, Combine0, Store1}; + SDValue Store0 = getInstr(Hexagon::S2_storerd_io, dl, MVT::Other, Ops0, DAG); + return Store0; } SDValue @@ -1747,6 +1798,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SETCC: case ISD::INTRINSIC_VOID: return Op; case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); + case ISD::STORE: return LowerHvxStore(Op, DAG); // Unaligned loads will be handled by the default lowering. case ISD::LOAD: return SDValue(); } @@ -1756,28 +1808,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("Unhandled HVX operation"); } -void -HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, - SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { -} - -void -HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - unsigned Opc = N->getOpcode(); - switch (Opc) { - case ISD::BITCAST: - if (isHvxBoolTy(ty(N->getOperand(0)))) { - SDValue Op(N, 0); - SDValue C = LowerHvxBitcast(Op, DAG); - Results.push_back(C); - } - break; - default: - break; - } -} - SDValue HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -1810,16 +1840,3 @@ HexagonTargetLowering::isHvxOperation(SDValue Op) const { return Subtarget.isHVXVectorType(ty(V), true); }); } - -bool -HexagonTargetLowering::isHvxOperation(SDNode *N) const { - // If the type of any result, or any operand type are HVX vector types, - // this is an HVX operation. - auto IsHvxTy = [this] (EVT Ty) { - return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); - }; - auto IsHvxOp = [this] (SDValue Op) { - return Subtarget.isHVXVectorType(ty(Op), true); - }; - return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp); -} diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll index 6bf2cdaf..c072510 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-hvx-pred-bitcast.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s ; CHECK-LABEL: danny: -; CHECK: vrmpy +; CHECK: vand define i64 @danny(<64 x i8> %a0, <64 x i8> %a1) #0 { %v0 = icmp eq <64 x i8> %a0, %a1 %v1 = bitcast <64 x i1> %v0 to i64 @@ -9,19 +9,18 @@ define i64 @danny(<64 x i8> %a0, <64 x i8> %a1) #0 { } ; CHECK-LABEL: sammy: -; CHECK: vrmpy +; CHECK: vand define i32 @sammy(<32 x i16> %a0, <32 x i16> %a1) #0 { %v0 = icmp eq <32 x i16> %a0, %a1 %v1 = bitcast <32 x i1> %v0 to i32 ret i32 %v1 } -; CHECK-LABEL: kirby: -; CHECK: vrmpy -define i16 @kirby(<16 x i32> %a0, <16 x i32> %a1) #0 { - %v0 = icmp eq <16 x i32> %a0, %a1 - %v1 = bitcast <16 x i1> %v0 to i16 - ret i16 %v1 -} +; This one still doesn't work. +; define i16 @kirby(<16 x i32> %a0, <16 x i32> %a1) #0 { +; %v0 = icmp eq <16 x i32> %a0, %a1 +; %v1 = bitcast <16 x i1> %v0 to i16 +; ret i16 %v1 +; } attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,+hvx,+hvxv66,+hvx-length64b" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-store-bitcast-v128i1.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-store-bitcast-v128i1.ll deleted file mode 100644 index d8d24a0..0000000 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-store-bitcast-v128i1.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s - -; Primarily check if this compiles without failing. - -; CHECK-LABEL: fred: -; CHECK: memd -define void @fred(<128 x i8> %a0, <128 x i8> %a1, i128* %a2) #0 { - %v0 = icmp eq <128 x i8> %a0, %a1 - %v1 = bitcast <128 x i1> %v0 to i128 - store i128 %v1, i128* %a2, align 16 - ret void -} - -attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+v66,+hvx,+hvxv66,+hvx-length128b" } - diff --git a/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll b/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll index 0834424..6aae0954 100644 --- a/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll +++ b/llvm/test/CodeGen/Hexagon/hvx-bitcast-v64i1.ll @@ -1,47 +1,51 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; Test that LLVM does not assert and bitcast v64i1 to i64 is lowered -; without crashing. -; CHECK: valign +; Test that LLVM does not assert and bitcast v64i1 to i64 is lowered. + +; CHECK: v[[REG1:[0-9]+]] = valign(v{{[0-9]+}},v{{[0-9]+}},#2) +; CHECK: v[[REG2:[0-9]+]] = vor(v{{[0-9]+}},v[[REG1]]) +; CHECK: v[[REG3:[0-9]+]] = valign(v[[REG2]],v[[REG2]],#1) +; CHECK: v[[REG4:[0-9]+]] = vor(v{{[0-9]+}},v[[REG3]]) +; CHECK: v[[REG5:[0-9]+]] = vand(v[[REG4]],v{{[0-9]+}}) +; CHECK: v{{[0-9]+}}.w = vasl(v[[REG5]].w,v{{[0-9]+}}.w) target triple = "hexagon" -define dso_local void @f0() local_unnamed_addr #0 { -b0: - br i1 undef, label %b2, label %b1 - -b1: ; preds = %b0 - %v0 = load i8, i8* undef, align 1 - %v1 = zext i8 %v0 to i32 - %v2 = add nsw i32 %v1, -1 - %v3 = insertelement <64 x i32> undef, i32 %v2, i32 0 - %v4 = shufflevector <64 x i32> %v3, <64 x i32> undef, <64 x i32> zeroinitializer - %v5 = icmp ule <64 x i32> undef, %v4 - %v6 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* nonnull undef, i32 1, <64 x i1> %v5, <64 x i8> undef) - %v7 = lshr <64 x i8> %v6, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> - %v8 = and <64 x i8> %v7, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> - %v9 = zext <64 x i8> %v8 to <64 x i32> - %v10 = add nsw <64 x i32> undef, %v9 - %v11 = select <64 x i1> %v5, <64 x i32> %v10, <64 x i32> undef - %v12 = add <64 x i32> %v11, undef - %v13 = add <64 x i32> %v12, undef - %v14 = add <64 x i32> %v13, undef - %v15 = add <64 x i32> %v14, undef - %v16 = add <64 x i32> %v15, undef - %v17 = add <64 x i32> %v16, undef - %v18 = add <64 x i32> %v17, undef - %v19 = extractelement <64 x i32> %v18, i32 0 - %v20 = getelementptr inbounds i8, i8* null, i32 2160 - %v21 = bitcast i8* %v20 to i32* - store i32 %v19, i32* %v21, align 4 - br label %b2 - -b2: ; preds = %b1, %b0 +define dso_local void @fun() local_unnamed_addr #0 { +entry: + br i1 undef, label %cleanup, label %if.end + +if.end: + %0 = load i8, i8* undef, align 1 + %conv13.i = zext i8 %0 to i32 + %trip.count.minus.1216 = add nsw i32 %conv13.i, -1 + %broadcast.splatinsert221 = insertelement <64 x i32> undef, i32 %trip.count.minus.1216, i32 0 + %broadcast.splat222 = shufflevector <64 x i32> %broadcast.splatinsert221, <64 x i32> undef, <64 x i32> zeroinitializer + %1 = icmp ule <64 x i32> undef, %broadcast.splat222 + %wide.masked.load223 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* nonnull undef, i32 1, <64 x i1> %1, <64 x i8> undef) + %2 = lshr <64 x i8> %wide.masked.load223, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> + %3 = and <64 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %4 = zext <64 x i8> %3 to <64 x i32> + %5 = add nsw <64 x i32> undef, %4 + %6 = select <64 x i1> %1, <64 x i32> %5, <64 x i32> undef + %bin.rdx225 = add <64 x i32> %6, undef + %bin.rdx227 = add <64 x i32> %bin.rdx225, undef + %bin.rdx229 = add <64 x i32> %bin.rdx227, undef + %bin.rdx231 = add <64 x i32> %bin.rdx229, undef + %bin.rdx233 = add <64 x i32> %bin.rdx231, undef + %bin.rdx235 = add <64 x i32> %bin.rdx233, undef + %bin.rdx237 = add <64 x i32> %bin.rdx235, undef + %7 = extractelement <64 x i32> %bin.rdx237, i32 0 + %nChans = getelementptr inbounds i8, i8* null, i32 2160 + %8 = bitcast i8* %nChans to i32* + store i32 %7, i32* %8, align 4 + br label %cleanup + +cleanup: ret void } ; Function Attrs: argmemonly nounwind readonly willreturn -declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32 immarg, <64 x i1>, <64 x i8>) #1 +declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>) attributes #0 = { "target-features"="+hvx-length64b,+hvxv67,+v67,-long-calls" } -attributes #1 = { argmemonly nounwind readonly willreturn } diff --git a/llvm/test/CodeGen/Hexagon/store-vector-pred.ll b/llvm/test/CodeGen/Hexagon/store-vector-pred.ll index 407a30c..0c79497 100644 --- a/llvm/test/CodeGen/Hexagon/store-vector-pred.ll +++ b/llvm/test/CodeGen/Hexagon/store-vector-pred.ll @@ -1,47 +1,47 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc < %s | FileCheck %s ; This test checks that store a vector predicate of type v128i1 is lowered -; without crashing. -; CHECK: valign +; and two double stores are generated. + +; CHECK-DAG: memd(r{{[0-9]+}}+#0) = r{{[0-9]+}}:{{[0-9]+}} +; CHECK-DAG: memd(r{{[0-9]+}}+#8) = r{{[0-9]+}}:{{[0-9]+}} target triple = "hexagon" -define dso_local void @f0() local_unnamed_addr #0 { -b0: - br i1 undef, label %b2, label %b1 - -b1: ; preds = %b0 - %v0 = load i8, i8* undef, align 1 - %v1 = zext i8 %v0 to i32 - %v2 = add nsw i32 %v1, -1 - %v3 = insertelement <128 x i32> undef, i32 %v2, i32 0 - %v4 = shufflevector <128 x i32> %v3, <128 x i32> undef, <128 x i32> zeroinitializer - %v5 = icmp ule <128 x i32> undef, %v4 - %v6 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* nonnull undef, i32 1, <128 x i1> %v5, <128 x i8> undef) - %v7 = lshr <128 x i8> %v6, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> - %v8 = and <128 x i8> %v7, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> - %v9 = zext <128 x i8> %v8 to <128 x i32> - %v10 = add nsw <128 x i32> undef, %v9 - %v11 = select <128 x i1> %v5, <128 x i32> %v10, <128 x i32> undef - %v12 = add <128 x i32> %v11, undef - %v13 = add <128 x i32> %v12, undef - %v14 = add <128 x i32> %v13, undef - %v15 = add <128 x i32> %v14, undef - %v16 = add <128 x i32> %v15, undef - %v17 = add <128 x i32> %v16, undef - %v18 = add <128 x i32> %v17, undef - %v19 = extractelement <128 x i32> %v18, i32 0 - %v20 = getelementptr inbounds i8, i8* null, i32 2160 - %v21 = bitcast i8* %v20 to i32* - store i32 %v19, i32* %v21, align 4 - br label %b2 - -b2: ; preds = %b1, %b0 +define dso_local void @raac_UnpackADIFHeader() local_unnamed_addr #0 { +entry: + br i1 undef, label %cleanup, label %if.end + +if.end: + %0 = load i8, i8* undef, align 1 + %conv13.i = zext i8 %0 to i32 + %trip.count.minus.1216 = add nsw i32 %conv13.i, -1 + %broadcast.splatinsert221 = insertelement <128 x i32> undef, i32 %trip.count.minus.1216, i32 0 + %broadcast.splat222 = shufflevector <128 x i32> %broadcast.splatinsert221, <128 x i32> undef, <128 x i32> zeroinitializer + %1 = icmp ule <128 x i32> undef, %broadcast.splat222 + %wide.masked.load223 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* nonnull undef, i32 1, <128 x i1> %1, <128 x i8> undef) + %2 = lshr <128 x i8> %wide.masked.load223, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> + %3 = and <128 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + %4 = zext <128 x i8> %3 to <128 x i32> + %5 = add nsw <128 x i32> undef, %4 + %6 = select <128 x i1> %1, <128 x i32> %5, <128 x i32> undef + %bin.rdx225 = add <128 x i32> %6, undef + %bin.rdx227 = add <128 x i32> %bin.rdx225, undef + %bin.rdx229 = add <128 x i32> %bin.rdx227, undef + %bin.rdx231 = add <128 x i32> %bin.rdx229, undef + %bin.rdx233 = add <128 x i32> %bin.rdx231, undef + %bin.rdx235 = add <128 x i32> %bin.rdx233, undef + %bin.rdx237 = add <128 x i32> %bin.rdx235, undef + %7 = extractelement <128 x i32> %bin.rdx237, i32 0 + %nChans = getelementptr inbounds i8, i8* null, i32 2160 + %8 = bitcast i8* %nChans to i32* + store i32 %7, i32* %8, align 4 + br label %cleanup + +cleanup: ret void -} + } -; Function Attrs: argmemonly nounwind readonly willreturn -declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>) #1 +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32 immarg, <128 x i1>, <128 x i8>) attributes #0 = { "target-features"="+hvx-length128b,+hvxv67,+v67,-long-calls" } -attributes #1 = { argmemonly nounwind readonly willreturn } |