aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/WebAssembly
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/WebAssembly')
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp21
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td68
3 files changed, 83 insertions, 8 deletions
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6472334..47c24fc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -317,6 +317,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
}
+ if (Subtarget->hasFP16()) {
+ setOperationAction(ISD::FMA, MVT::v8f16, Legal);
+ }
+
+ if (Subtarget->hasRelaxedSIMD()) {
+ setOperationAction(ISD::FMULADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMULADD, MVT::v2f64, Legal);
+ }
+
// Partial MLA reductions.
for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
@@ -1120,6 +1129,18 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
+ const MachineFunction &MF, EVT VT) const {
+ if (!Subtarget->hasFP16() || !VT.isVector())
+ return false;
+
+ EVT ScalarVT = VT.getScalarType();
+ if (!ScalarVT.isSimple())
+ return false;
+
+ return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
+}
+
bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
SDValue Op, const TargetLoweringOpt &TLO) const {
// ISel process runs DAGCombiner after legalization; this step is called
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index b33a853..472ec67 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -81,6 +81,8 @@ private:
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(MVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ EVT VT) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 49af78b..0f6e1ca 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1213,6 +1213,27 @@ defm EXTMUL_LOW_U :
defm EXTMUL_HIGH_U :
SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
+// Pattern for i32x4.dot_i16x8_s
+def : Pat<
+ (v4i32 (add
+ (wasm_shuffle
+ (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
+ (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
+ (i32 0), (i32 1), (i32 2), (i32 3),
+ (i32 8), (i32 9), (i32 10), (i32 11),
+ (i32 16), (i32 17), (i32 18), (i32 19),
+ (i32 24), (i32 25), (i32 26), (i32 27)),
+ (wasm_shuffle
+ (v4i32 (extmul_low_s v8i16:$lhs, v8i16:$rhs)),
+ (v4i32 (extmul_high_s v8i16:$lhs, v8i16:$rhs)),
+ (i32 4), (i32 5), (i32 6), (i32 7),
+ (i32 12), (i32 13), (i32 14), (i32 15),
+ (i32 20), (i32 21), (i32 22), (i32 23),
+ (i32 28), (i32 29), (i32 30), (i32 31)))
+ ),
+ (v4i32 (DOT v8i16:$lhs, v8i16:$rhs))
+>;
+
//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic
//===----------------------------------------------------------------------===//
@@ -1626,7 +1647,8 @@ defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
// Relaxed (Negative) Multiply-Add (madd/nmadd)
//===----------------------------------------------------------------------===//
-multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> {
+multiclass RELAXED_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
defm MADD_#vec :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
[(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
@@ -1640,16 +1662,46 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
- def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fadd_contract (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b)), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
- def : Pat<(fsub_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
- (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+ def : Pat<(fsub_contract (vec.vt V128:$c), (fmul_contract (vec.vt V128:$a), (vec.vt V128:$b))),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
+ def : Pat<(fmuladd (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)),
+ (!cast<Instruction>("NMADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<reqs>;
}
-defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
-defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+defm "" : RELAXED_SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
+defm "" : RELAXED_SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>;
+
+//===----------------------------------------------------------------------===//
+// FP16 (Negative) Multiply-Add (madd/nmadd)
+//===----------------------------------------------------------------------===//
+
+multiclass HALF_PRECISION_SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS,
+ list<Predicate> reqs> {
+ defm MADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".madd\t$dst, $a, $b, $c",
+ vec.prefix#".madd", simdopA, reqs>;
+ defm NMADD_#vec :
+ SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (fma
+ (fneg (vec.vt V128:$a)), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".nmadd\t$dst, $a, $b, $c",
+ vec.prefix#".nmadd", simdopS, reqs>;
+}
+defm "" : HALF_PRECISION_SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>;
+
+// TODO: I think separate intrinsics should be introduced for these FP16 operations.
+def : Pat<(v8f16 (int_wasm_relaxed_madd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))),
+ (MADD_F16x8 V128:$a, V128:$b, V128:$c)>;
+def : Pat<(v8f16 (int_wasm_relaxed_nmadd (v8f16 V128:$a), (v8f16 V128:$b), (v8f16 V128:$c))),
+ (NMADD_F16x8 V128:$a, V128:$b, V128:$c)>;
//===----------------------------------------------------------------------===//
// Laneselect