diff options
Diffstat (limited to 'llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td')
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index d8948ad..1306026 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1505,6 +1505,51 @@ defm Q15MULR_SAT_S : SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>; //===----------------------------------------------------------------------===// +// Partial reductions, using: dot, extmul and extadd_pairwise +//===----------------------------------------------------------------------===// +// MLA: v8i16 -> v4i32 +def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v8i16 V128:$lhs), + (v8i16 V128:$rhs))), + (ADD_I32x4 (DOT $lhs, $rhs), $acc)>; +def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v8i16 V128:$lhs), + (v8i16 V128:$rhs))), + (ADD_I32x4 (ADD_I32x4 (EXTMUL_LOW_U_I32x4 $lhs, $rhs), + (EXTMUL_HIGH_U_I32x4 $lhs, $rhs)), + $acc)>; +// MLA: v16i8 -> v4i32 +def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$lhs), + (v16i8 V128:$rhs))), + (ADD_I32x4 (ADD_I32x4 (DOT (extend_low_s_I16x8 $lhs), + (extend_low_s_I16x8 $rhs)), + (DOT (extend_high_s_I16x8 $lhs), + (extend_high_s_I16x8 $rhs))), + $acc)>; +def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v16i8 V128:$lhs), + (v16i8 V128:$rhs))), + (ADD_I32x4 (ADD_I32x4 (extadd_pairwise_u_I32x4 (EXTMUL_LOW_U_I16x8 $lhs, $rhs)), + (extadd_pairwise_u_I32x4 (EXTMUL_HIGH_U_I16x8 $lhs, $rhs))), + $acc)>; + +// Accumulate: v8i16 -> v4i32 +def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v8i16 V128:$in), + (I16x8.splat (i32 1)))), + (ADD_I32x4 (extadd_pairwise_s_I32x4 $in), $acc)>; + +def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v8i16 V128:$in), + (I16x8.splat (i32 1)))), + (ADD_I32x4 (extadd_pairwise_u_I32x4 $in), $acc)>; + +// Accumulate: v16i8 -> v4i32 +def : Pat<(v4i32 (partial_reduce_smla (v4i32 V128:$acc), (v16i8 V128:$in), + (I8x16.splat (i32 1)))), + (ADD_I32x4 (extadd_pairwise_s_I32x4 (extadd_pairwise_s_I16x8 $in)), + $acc)>; +def : Pat<(v4i32 (partial_reduce_umla (v4i32 V128:$acc), (v16i8 V128:$in), + (I8x16.splat (i32 1)))), + (ADD_I32x4 (extadd_pairwise_u_I32x4 (extadd_pairwise_u_I16x8 $in)), + $acc)>; + +//===----------------------------------------------------------------------===// // Relaxed swizzle //===----------------------------------------------------------------------===// |