diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2014-12-04 09:40:44 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2014-12-04 09:40:44 +0000 |
commit | f1de34b84dea91b5060ee0fafbadaad5deaf199c (patch) | |
tree | a9f9f3b64f43e764b76d989db0be6bbcd1e681fa /llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | |
parent | 8b24b32c44d77ca3121240a13e0f40cb6d5a766c (diff) | |
download | llvm-f1de34b84dea91b5060ee0fafbadaad5deaf199c.zip llvm-f1de34b84dea91b5060ee0fafbadaad5deaf199c.tar.gz llvm-f1de34b84dea91b5060ee0fafbadaad5deaf199c.tar.bz2 |
Masked Load / Store Intrinsics - the CodeGen part.
I'm recommiting the codegen part of the patch.
The vectorizer part will be send to review again.
Masked Vector Load and Store Intrinsics.
Introduced new target-independent intrinsics in order to support masked vector loads and stores. The loop vectorizer optimizes loops containing conditional memory accesses by generating these intrinsics for existing targets AVX2 and AVX-512. The vectorizer asks the target about availability of masked vector loads and stores.
Added SDNodes for masked operations and lowering patterns for X86 code generator.
Examples:
<16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32> %passthru, i32 4 /* align */, <16 x i1> %mask)
declare void @llvm.masked.store.v8f64(i8* %addr, <8 x double> %value, i32 4, <8 x i1> %mask)
Scalarizer for other targets (not AVX2/AVX-512) will be done in a separate patch.
http://reviews.llvm.org/D6191
llvm-svn: 223348
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index eeecbf1..c3b197e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3617,6 +3617,70 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(1)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(1)); + SDValue Mask = getValue(I.getArgOperand(3)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue InChain = DAG.getRoot(); + if (AA->pointsToConstantMemory( + AliasAnalysis::Location(PtrOperand, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + InChain = DAG.getEntryNode(); + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO); + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + setValue(&I, Load); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -4918,6 +4982,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_load: + visitMaskedLoad(I); + return nullptr; + case Intrinsic::masked_store: + visitMaskedStore(I); + return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: |