aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp21
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td18
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td37
4 files changed, 72 insertions, 12 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2e4a2f8..a8145b6 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -296,6 +296,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("avx.blend.p") ||
Name == "avx2.pblendw" ||
Name.startswith("avx2.pblendd.") ||
+ Name.startswith("avx.vbroadcastf128") ||
Name == "avx2.vbroadcasti128" ||
Name == "xop.vpcmov" ||
(Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
@@ -886,7 +887,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
- } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
+ } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
// Replace broadcasts with a series of insertelements.
Type *VecTy = CI->getType();
Type *EltTy = VecTy->getVectorElementType();
@@ -918,15 +919,21 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
- } else if (IsX86 && Name == "avx2.vbroadcasti128") {
- // Replace vbroadcasts with a vector shuffle.
- Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
+ } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
+ Name == "avx2.vbroadcasti128")) {
+ // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
+ Type *EltTy = CI->getType()->getVectorElementType();
+ unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
+ Type *VT = VectorType::get(EltTy, NumSrcElts);
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
PointerType::getUnqual(VT));
Value *Load = Builder.CreateLoad(VT, Op);
- uint32_t Idxs[4] = { 0, 1, 0, 1 };
- Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
- Idxs);
+ if (NumSrcElts == 2)
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 0, 1 });
+ else
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 2, 3, 0, 1, 2, 3 });
} else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
Name.startswith("avx2.vbroadcast") ||
Name.startswith("avx512.pbroadcast") ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 07a0543..7e5f0ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12804,6 +12804,10 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
// (insert_subvector (insert_subvector undef, (load addr), 0),
// (load addr + 16), Elts/2)
// --> load32 addr
+ // or a 16-byte broadcast:
+ // (insert_subvector (insert_subvector undef, (load addr), 0),
+ // (load addr), Elts/2)
+ // --> X86SubVBroadcast(load16 addr)
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
@@ -12822,6 +12826,10 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
return Ld;
}
+
+ // If lower/upper loads are the same then lower to a VBROADCASTF128.
+ if (SubVec2 == peekThroughBitcasts(SubVec))
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
}
}
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 8b66732..890a523 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -986,6 +986,10 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
+//===----------------------------------------------------------------------===//
+// AVX-512 BROADCAST SUBVECTORS
+//
+
defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
v16i32_info, v4i32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT4>;
@@ -1006,7 +1010,13 @@ defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
v8f32x_info, v4f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
+
+def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+ (VBROADCASTI32X4Z256rm addr:$src)>;
+def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+ (VBROADCASTI32X4Z256rm addr:$src)>;
}
+
let Predicates = [HasVLX, HasDQI] in {
defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
v4i64x_info, v2i64x_info>, VEX_W,
@@ -1015,6 +1025,14 @@ defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
v4f64x_info, v2f64x_info>, VEX_W,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
}
+
+let Predicates = [HasVLX, NoDQI] in {
+def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+ (VBROADCASTF32X4Z256rm addr:$src)>;
+def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+ (VBROADCASTI32X4Z256rm addr:$src)>;
+}
+
let Predicates = [HasDQI] in {
defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
v8i64_info, v2i64x_info>, VEX_W,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index de7e753..9a515b7 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7759,23 +7759,50 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
v4f64, v2f64, WriteFShuffle256>, VEX_L;
+//===----------------------------------------------------------------------===//
+// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
+// halves of a 256-bit vector.
+//
let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
(ins i128mem:$src),
"vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
Sched<[WriteLoad]>, VEX, VEX_L;
+let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX] in
def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
(ins f128mem:$src),
- "vbroadcastf128\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst,
- (int_x86_avx_vbroadcastf128_pd_256 addr:$src))]>,
+ "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
Sched<[WriteFShuffleLd]>, VEX, VEX_L;
-let Predicates = [HasAVX] in
-def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+let Predicates = [HasAVX2, NoVLX] in {
+def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+ (VBROADCASTI128 addr:$src)>;
+def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VBROADCASTI128 addr:$src)>;
+def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+ (VBROADCASTI128 addr:$src)>;
+def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+ (VBROADCASTI128 addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+ (VBROADCASTF128 addr:$src)>;
+def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
(VBROADCASTF128 addr:$src)>;
+}
+let Predicates = [HasAVX1Only] in {
+def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+ (VBROADCASTF128 addr:$src)>;
+def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VBROADCASTF128 addr:$src)>;
+def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+ (VBROADCASTF128 addr:$src)>;
+def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+ (VBROADCASTF128 addr:$src)>;
+}
//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values