diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 37 |
4 files changed, 72 insertions, 12 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 2e4a2f8..a8145b6 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -296,6 +296,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("avx.blend.p") || Name == "avx2.pblendw" || Name.startswith("avx2.pblendd.") || + Name.startswith("avx.vbroadcastf128") || Name == "avx2.vbroadcasti128" || Name == "xop.vpcmov" || (Name.startswith("xop.vpcom") && F->arg_size() == 2))) { @@ -886,7 +887,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); Rep = Builder.CreateZExt(Rep, CI->getType(), ""); - } else if (IsX86 && Name.startswith("avx.vbroadcast")) { + } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) { // Replace broadcasts with a series of insertelements. Type *VecTy = CI->getType(); Type *EltTy = VecTy->getVectorElementType(); @@ -918,15 +919,21 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { bool DoSext = (StringRef::npos != Name.find("pmovsx")); Rep = DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy); - } else if (IsX86 && Name == "avx2.vbroadcasti128") { - // Replace vbroadcasts with a vector shuffle. - Type *VT = VectorType::get(Type::getInt64Ty(C), 2); + } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || + Name == "avx2.vbroadcasti128")) { + // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. + Type *EltTy = CI->getType()->getVectorElementType(); + unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); + Type *VT = VectorType::get(EltTy, NumSrcElts); Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), PointerType::getUnqual(VT)); Value *Load = Builder.CreateLoad(VT, Op); - uint32_t Idxs[4] = { 0, 1, 0, 1 }; - Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), - Idxs); + if (NumSrcElts == 2) + Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), + { 0, 1, 0, 1 }); + else + Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), + { 0, 1, 2, 3, 0, 1, 2, 3 }); } else if (IsX86 && (Name.startswith("avx2.pbroadcast") || Name.startswith("avx2.vbroadcast") || Name.startswith("avx512.pbroadcast") || diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 07a0543..7e5f0ad 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -12804,6 +12804,10 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, // (insert_subvector (insert_subvector undef, (load addr), 0), // (load addr + 16), Elts/2) // --> load32 addr + // or a 16-byte broadcast: + // (insert_subvector (insert_subvector undef, (load addr), 0), + // (load addr), Elts/2) + // --> X86SubVBroadcast(load16 addr) if ((IdxVal == OpVT.getVectorNumElements() / 2) && Vec.getOpcode() == ISD::INSERT_SUBVECTOR && OpVT.is256BitVector() && SubVecVT.is128BitVector()) { @@ -12822,6 +12826,10 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false)) return Ld; } + + // If lower/upper loads are the same then lower to a VBROADCASTF128. + if (SubVec2 == peekThroughBitcasts(SubVec)) + return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec); } } } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 8b66732..890a523 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -986,6 +986,10 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, AVX5128IBase, EVEX; } +//===----------------------------------------------------------------------===// +// AVX-512 BROADCAST SUBVECTORS +// + defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", v16i32_info, v4i32x_info>, EVEX_V512, EVEX_CD8<32, CD8VT4>; @@ -1006,7 +1010,13 @@ defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4", v8f32x_info, v4f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VT4>; + +def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), + (VBROADCASTI32X4Z256rm addr:$src)>; +def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), + (VBROADCASTI32X4Z256rm addr:$src)>; } + let Predicates = [HasVLX, HasDQI] in { defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", v4i64x_info, v2i64x_info>, VEX_W, @@ -1015,6 +1025,14 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2", v4f64x_info, v2f64x_info>, VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>; } + +let Predicates = [HasVLX, NoDQI] in { +def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), + (VBROADCASTF32X4Z256rm addr:$src)>; +def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), + (VBROADCASTI32X4Z256rm addr:$src)>; +} + let Predicates = [HasDQI] in { defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2", v8i64_info, v2i64x_info>, VEX_W, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index de7e753..9a515b7 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7759,23 +7759,50 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, v4f64, v2f64, WriteFShuffle256>, VEX_L; +//===----------------------------------------------------------------------===// +// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both +// halves of a 256-bit vector. +// let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vbroadcasti128\t{$src, $dst|$dst, $src}", []>, Sched<[WriteLoad]>, VEX, VEX_L; +let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX] in def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vbroadcastf128\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_vbroadcastf128_pd_256 addr:$src))]>, + "vbroadcastf128\t{$src, $dst|$dst, $src}", []>, Sched<[WriteFShuffleLd]>, VEX, VEX_L; -let Predicates = [HasAVX] in -def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), +let Predicates = [HasAVX2, NoVLX] in { +def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), + (VBROADCASTI128 addr:$src)>; +def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))), + (VBROADCASTI128 addr:$src)>; +def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), + (VBROADCASTI128 addr:$src)>; +def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), + (VBROADCASTI128 addr:$src)>; +} + +let Predicates = [HasAVX, NoVLX] in { +def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))), + (VBROADCASTF128 addr:$src)>; +def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))), (VBROADCASTF128 addr:$src)>; +} +let Predicates = [HasAVX1Only] in { +def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))), + (VBROADCASTF128 addr:$src)>; +def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))), + (VBROADCASTF128 addr:$src)>; +def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))), + (VBROADCASTF128 addr:$src)>; +def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))), + (VBROADCASTF128 addr:$src)>; +} //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values |