diff options
author | Thomas Lively <tlively@google.com> | 2019-08-31 00:12:29 +0000 |
---|---|---|
committer | Thomas Lively <tlively@google.com> | 2019-08-31 00:12:29 +0000 |
commit | d0d931706146bef249a40b875fe4f09bd84e3c31 (patch) | |
tree | 42c1059d1ea1a23e9e3bcc474acefca719b1de40 | |
parent | 082754176f2ec785535f4bf65a804fdebbca21b2 (diff) | |
download | llvm-d0d931706146bef249a40b875fe4f09bd84e3c31.zip llvm-d0d931706146bef249a40b875fe4f09bd84e3c31.tar.gz llvm-d0d931706146bef249a40b875fe4f09bd84e3c31.tar.bz2 |
[WebAssembly] Add SIMD QFMA/QFMS
Summary:
Adds clang builtins and LLVM intrinsics for these experimental
instructions. They are not implemented in engines yet, but that is ok
because the user must opt into using them by calling the builtins.
Reviewers: aheejin, dschuff
Reviewed By: aheejin
Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D67020
llvm-svn: 370556
-rw-r--r-- | clang/include/clang/Basic/BuiltinsWebAssembly.def | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 24 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-wasm.c | 28 | ||||
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll | 50 | ||||
-rw-r--r-- | llvm/test/MC/WebAssembly/simd-encodings.s | 12 |
7 files changed, 146 insertions, 2 deletions
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index acd713d..37f945a 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -108,6 +108,11 @@ TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "unimplemented-simd1 TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "unimplemented-simd128") TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_qfma_f32x4, "V4fV4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_qfms_f32x4, "V4fV4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_qfma_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128") +TARGET_BUILTIN(__builtin_wasm_qfms_f64x2, "V2dV2dV2dV2d", "nc", "unimplemented-simd128") + TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i64x2_f64x2, "V2LLiV2d", "nc", "unimplemented-simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e9e9fe2..1b2468d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -14173,7 +14173,29 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } - + case WebAssembly::BI__builtin_wasm_qfma_f32x4: + case WebAssembly::BI__builtin_wasm_qfms_f32x4: + case WebAssembly::BI__builtin_wasm_qfma_f64x2: + case WebAssembly::BI__builtin_wasm_qfms_f64x2: { + Value *A = EmitScalarExpr(E->getArg(0)); + Value *B = EmitScalarExpr(E->getArg(1)); + Value *C = EmitScalarExpr(E->getArg(2)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_qfma_f32x4: + case WebAssembly::BI__builtin_wasm_qfma_f64x2: + IntNo = Intrinsic::wasm_qfma; + break; + case WebAssembly::BI__builtin_wasm_qfms_f32x4: + case WebAssembly::BI__builtin_wasm_qfms_f64x2: + IntNo = Intrinsic::wasm_qfms; + break; + default: + llvm_unreachable("unexpected builtin ID"); + } + Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); + return Builder.CreateCall(Callee, {A, B, C}); + } default: return nullptr; } diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index f3129b7..43299bd 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -412,6 +412,34 @@ f64x2 sqrt_f64x2(f64x2 x) { // WEBASSEMBLY: ret } +f32x4 qfma_f32x4(f32x4 a, f32x4 b, f32x4 c) { + return __builtin_wasm_qfma_f32x4(a, b, c); + // WEBASSEMBLY: call <4 x float> @llvm.wasm.qfma.v4f32( + // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c) + // WEBASSEMBLY-NEXT: ret +} + +f32x4 qfms_f32x4(f32x4 a, f32x4 b, f32x4 c) { + return __builtin_wasm_qfms_f32x4(a, b, c); + // WEBASSEMBLY: call <4 x float> @llvm.wasm.qfms.v4f32( + // WEBASSEMBLY-SAME: <4 x float> %a, <4 x float> %b, <4 x float> %c) + // WEBASSEMBLY-NEXT: ret +} + +f64x2 qfma_f64x2(f64x2 a, f64x2 b, f64x2 c) { + return __builtin_wasm_qfma_f64x2(a, b, c); + // WEBASSEMBLY: call <2 x double> @llvm.wasm.qfma.v2f64( + // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c) + // WEBASSEMBLY-NEXT: ret +} + +f64x2 qfms_f64x2(f64x2 a, f64x2 b, f64x2 c) { + return __builtin_wasm_qfms_f64x2(a, b, c); + // WEBASSEMBLY: call <2 x double> @llvm.wasm.qfms.v2f64( + // WEBASSEMBLY-SAME: <2 x double> %a, <2 x double> %b, <2 x double> %c) + // WEBASSEMBLY-NEXT: ret +} + i32x4 trunc_saturate_s_i32x4_f32x4(f32x4 f) { return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(f); // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float> %f) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 4750c31..73d190b 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -109,6 +109,14 @@ def int_wasm_alltrue : Intrinsic<[llvm_i32_ty], [llvm_anyvector_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfma : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_qfms : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Bulk memory intrinsics diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index dd8930f..ff03167 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -732,3 +732,24 @@ foreach t2 = !foldl( ) ) in def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; + +//===----------------------------------------------------------------------===// +// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS) +//===----------------------------------------------------------------------===// +multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> { + defm QFMA_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>; + defm QFMS_#vec_t : + SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), + (outs), (ins), + [(set (vec_t V128:$dst), + (int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))], + vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>; +} + +defm "" : SIMDQFM<v4f32, "f32x4", 0x98>; +defm "" : SIMDQFM<v2f64, "f64x2", 0xa3>; diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 53c98d2..2077cf8 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -290,11 +290,35 @@ define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) { declare <4 x float> @llvm.wasm.bitselect.v4f32(<4 x float>, <4 x float>, <4 x float>) define <4 x float> @bitselect_v4f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %c) { %a = call <4 x float> @llvm.wasm.bitselect.v4f32( - <4 x float> %v1, <4 x float> %v2, <4 x float> %c + <4 x float> %v1, <4 x float> %v2, <4 x float> %c ) ret <4 x float> %a } +; CHECK-LABEL: qfma_v4f32: +; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.qfma.v4f32(<4 x float>, <4 x float>, <4 x float>) +define <4 x float> @qfma_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { + %v = call <4 x float> @llvm.wasm.qfma.v4f32( + <4 x float> %a, <4 x float> %b, <4 x float> %c + ) + ret <4 x float> %v +} + +; CHECK-LABEL: qfms_v4f32: +; SIMD128-NEXT: .functype qfms_v4f32 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f32x4.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.qfms.v4f32(<4 x float>, <4 x float>, <4 x float>) +define <4 x float> @qfms_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { + %v = call <4 x float> @llvm.wasm.qfms.v4f32( + <4 x float> %a, <4 x float> %b, <4 x float> %c + ) + ret <4 x float> %v +} + ; ============================================================================== ; 2 x f64 ; ============================================================================== @@ -309,3 +333,27 @@ define <2 x double> @bitselect_v2f64(<2 x double> %v1, <2 x double> %v2, <2 x do ) ret <2 x double> %a } + +; CHECK-LABEL: qfma_v2f64: +; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.qfma.v2f64(<2 x double>, <2 x double>, <2 x double>) +define <2 x double> @qfma_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { + %v = call <2 x double> @llvm.wasm.qfma.v2f64( + <2 x double> %a, <2 x double> %b, <2 x double> %c + ) + ret <2 x double> %v +} + +; CHECK-LABEL: qfms_v2f64: +; SIMD128-NEXT: .functype qfms_v2f64 (v128, v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.qfms $push[[R:[0-9]+]]=, $0, $1, $2{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.qfms.v2f64(<2 x double>, <2 x double>, <2 x double>) +define <2 x double> @qfms_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { + %v = call <2 x double> @llvm.wasm.qfms.v2f64( + <2 x double> %a, <2 x double> %b, <2 x double> %c + ) + ret <2 x double> %v +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index b2a39e7..491b484 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -382,6 +382,12 @@ main: # CHECK: f32x4.sqrt # encoding: [0xfd,0x97,0x01] f32x4.sqrt + # CHECK: f32x4.qfma # encoding: [0xfd,0x98,0x01] + f32x4.qfma + + # CHECK: f32x4.qfms # encoding: [0xfd,0x99,0x01] + f32x4.qfms + # CHECK: f32x4.add # encoding: [0xfd,0x9a,0x01] f32x4.add @@ -409,6 +415,12 @@ main: # CHECK: f64x2.sqrt # encoding: [0xfd,0xa2,0x01] f64x2.sqrt + # CHECK: f64x2.qfma # encoding: [0xfd,0xa3,0x01] + f64x2.qfma + + # CHECK: f64x2.qfms # encoding: [0xfd,0xa4,0x01] + f64x2.qfms + # CHECK: f64x2.add # encoding: [0xfd,0xa5,0x01] f64x2.add |