diff options
Diffstat (limited to 'llvm/test/CodeGen/WebAssembly')
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/libcall_vectorized.ll | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll | 129 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/ref-test-func.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/returned.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/simd-arith.ll | 136 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll | 145 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/target-features-cpus.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/WebAssembly/vector-reduce.ll | 56 |
12 files changed, 375 insertions, 177 deletions
diff --git a/llvm/test/CodeGen/WebAssembly/libcall_vectorized.ll b/llvm/test/CodeGen/WebAssembly/libcall_vectorized.ll new file mode 100644 index 0000000..2d1056f --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/libcall_vectorized.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare <4 x float> @llvm.exp10.v4f32(<4 x float>) + +define <4 x float> @exp10_f32v4(<4 x float> %v) { +; CHECK-LABEL: exp10_f32v4: +; CHECK: .functype exp10_f32v4 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: f32x4.extract_lane $push0=, $pop12, 0 +; CHECK-NEXT: call $push1=, exp10f, $pop0 +; CHECK-NEXT: f32x4.splat $push2=, $pop1 +; CHECK-NEXT: local.get $push13=, 0 +; CHECK-NEXT: f32x4.extract_lane $push3=, $pop13, 1 +; CHECK-NEXT: call $push4=, exp10f, $pop3 +; CHECK-NEXT: f32x4.replace_lane $push5=, $pop2, 1, $pop4 +; CHECK-NEXT: local.get $push14=, 0 +; CHECK-NEXT: f32x4.extract_lane $push6=, $pop14, 2 +; CHECK-NEXT: call $push7=, exp10f, $pop6 +; CHECK-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7 +; CHECK-NEXT: local.get $push15=, 0 +; CHECK-NEXT: f32x4.extract_lane $push9=, $pop15, 3 +; CHECK-NEXT: call $push10=, exp10f, $pop9 +; CHECK-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10 +; CHECK-NEXT: return $pop11 +entry: + %r = call <4 x float> @llvm.exp10.v4f32(<4 x float> %v) + ret <4 x float> %r +} diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll new file mode 100644 index 0000000..0f968de --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -wasm-lower-em-ehsjlj -wasm-enable-sjlj -mtriple=wasm32-unknown-emscripten < %s | FileCheck %s + +@buf = external global i8 +declare i32 @setjmp(ptr) returns_twice +declare void @dummy() + +define void @test_static() { +; CHECK-LABEL: define void @test_static() personality ptr @__gxx_wasm_personality_v0 { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]] +; CHECK: [[SETJMP_DISPATCH]]: +; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ] +; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]] +; CHECK-NEXT: ] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]]) +; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]]) +; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]] +; CHECK: [[ENTRY_SPLIT_SPLIT]]: +; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: invoke void @dummy() +; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]] +; CHECK: [[_NOEXC:.*:]] +; CHECK-NEXT: ret void +; CHECK: [[ELSE]]: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]]) +; CHECK-NEXT: ret void +; CHECK: [[CATCH_DISPATCH_LONGJMP]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller +; CHECK: [[CATCH_LONGJMP:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [] +; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1) +; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1 +; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4 +; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4 +; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]] +; +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %x) + %call = call i32 @setjmp(ptr @buf) returns_twice + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %if, label %else + +if: + call void @dummy() + ret void + +else: + call void @llvm.lifetime.end.p0(i64 4, ptr %x) + ret void +} + +define void @test_dynamic(i32 %size) { +; CHECK-LABEL: define void @test_dynamic( +; CHECK-SAME: i32 [[SIZE:%.*]]) personality ptr @__gxx_wasm_personality_v0 { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]] +; CHECK: [[SETJMP_DISPATCH]]: +; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ] +; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]] +; CHECK-NEXT: ] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: [[X:%.*]] = alloca i32, i32 [[SIZE]], align 4 +; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]]) +; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]] +; CHECK: [[ENTRY_SPLIT_SPLIT]]: +; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: invoke void @dummy() +; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]] +; CHECK: [[_NOEXC:.*:]] +; CHECK-NEXT: ret void +; CHECK: [[ELSE]]: +; CHECK-NEXT: ret void +; CHECK: [[CATCH_DISPATCH_LONGJMP]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller +; CHECK: [[CATCH_LONGJMP:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [] +; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1) +; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1 +; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4 +; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4 +; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]] +; +entry: + %x = alloca i32, i32 %size, align 4 + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) + %call = call i32 @setjmp(ptr @buf) returns_twice + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %if, label %else + +if: + call void @dummy() + ret void + +else: + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll index fec9836..bab8403 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll @@ -16,10 +16,10 @@ entry: call void @foo(), !dbg !7 ret void, !dbg !8 ; CHECK: entry: - ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0:.*]] + ; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16, !dbg ![[DL0:.*]] + ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0]] ; CHECK: entry.split: - ; CHECK: alloca {{.*}}, !dbg ![[DL0]] ; CHECK: call void @__wasm_setjmp{{.*}}, !dbg ![[DL1:.*]] ; CHECK-NEXT: br {{.*}}, !dbg ![[DL2:.*]] diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index b584342..51dcf2f 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -22,17 +22,17 @@ entry: call void @longjmp(ptr %buf, i32 1) #1 unreachable ; CHECK: entry: +; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4 ; CHECK-NEXT: br label %entry.split ; CHECK: entry.split -; CHECK-NEXT: %[[BUF:.*]] = alloca [1 x %struct.__jmp_buf_tag] -; CHECK-NEXT: call void @__wasm_setjmp(ptr %[[BUF]], i32 1, ptr %functionInvocationId) +; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId) ; CHECK-NEXT: br label %entry.split.split ; CHECK: entry.split.split: ; CHECK-NEXT: phi i32 [ 0, %entry.split ], [ %[[LONGJMP_RESULT:.*]], %if.end ] -; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %[[BUF]] to [[PTR]] +; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %buf to [[PTR]] ; CHECK-NEXT: store [[PTR]] 0, ptr @__THREW__ ; CHECK-NEXT: call cc{{.*}} void @__invoke_void_[[PTR]]_i32(ptr @emscripten_longjmp, [[PTR]] %[[JMPBUF]], i32 1) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load [[PTR]], ptr @__THREW__ diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll index b4c93c4..9de6652 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll @@ -108,7 +108,7 @@ catch: ; preds = %catch.start call void @__cxa_end_catch() [ "funclet"(token %2) ] catchret from %2 to label %catchret.dest ; CHECK: catch: ; preds = %catch.start -; CHECK-NEXT: %exn = load ptr, ptr %exn.slot6, align 4 +; CHECK-NEXT: %exn = load ptr, ptr %exn.slot, align 4 ; CHECK-NEXT: %5 = call ptr @__cxa_begin_catch(ptr %exn) #3 [ "funclet"(token %2) ] ; CHECK-NEXT: invoke void @__cxa_end_catch() [ "funclet"(token %2) ] ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll index 82c04e2..e1cb859 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll @@ -25,26 +25,24 @@ entry: unreachable ; CHECK: entry: +; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4 ; CHECK-NEXT: br label %setjmp.dispatch ; CHECK: setjmp.dispatch: ; CHECK-NEXT: %[[VAL2:.*]] = phi i32 [ %val, %if.end ], [ undef, %entry ] -; CHECK-NEXT: %[[BUF:.*]] = phi ptr [ %[[BUF2:.*]], %if.end ], [ undef, %entry ] ; CHECK-NEXT: %label.phi = phi i32 [ %label, %if.end ], [ -1, %entry ] ; CHECK-NEXT: switch i32 %label.phi, label %entry.split [ ; CHECK-NEXT: i32 1, label %entry.split.split ; CHECK-NEXT: ] ; CHECK: entry.split: -; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId) ; CHECK-NEXT: br label %entry.split.split ; CHECK: entry.split.split: -; CHECK-NEXT: %[[BUF2]] = phi ptr [ %[[BUF]], %setjmp.dispatch ], [ %buf, %entry.split ] ; CHECK-NEXT: %setjmp.ret = phi i32 [ 0, %entry.split ], [ %[[VAL2]], %setjmp.dispatch ] -; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %[[BUF2]], i32 1) +; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %buf, i32 1) ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp ; CHECK: .noexc: diff --git a/llvm/test/CodeGen/WebAssembly/ref-test-func.ll b/llvm/test/CodeGen/WebAssembly/ref-test-func.ll index e4014ba..ea2453f 100644 --- a/llvm/test/CodeGen/WebAssembly/ref-test-func.ll +++ b/llvm/test/CodeGen/WebAssembly/ref-test-func.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -mcpu=mvp -mattr=+reference-types -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK32 %s -; RUN: llc < %s --mtriple=wasm64-unknown-unknown -mcpu=mvp -mattr=+reference-types -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK64 %s +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -mcpu=mvp -mattr=+reference-types -mattr=+gc -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK32 %s +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -mcpu=mvp -mattr=+reference-types -mattr=+gc -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK64 %s define void @test_fpsig_void_void(ptr noundef %func) local_unnamed_addr #0 { ; CHECK-LABEL: test_fpsig_void_void: diff --git a/llvm/test/CodeGen/WebAssembly/returned.ll b/llvm/test/CodeGen/WebAssembly/returned.ll index e767e29..aef75d8 100644 --- a/llvm/test/CodeGen/WebAssembly/returned.ll +++ b/llvm/test/CodeGen/WebAssembly/returned.ll @@ -80,3 +80,27 @@ define i32 @test_second_arg(i32 %a, i32 %b) { %call = call i32 @do_something_else(i32 %a, i32 %b) ret i32 %b } + +define void @test() { +; CHECK-LABEL: test: +; CHECK: .functype test () -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 16 +; CHECK-NEXT: i32.sub $push7=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push6=, $0=, $pop7 +; CHECK-NEXT: global.set __stack_pointer, $pop6 +; CHECK-NEXT: i32.const $push4=, 12 +; CHECK-NEXT: i32.add $push5=, $0, $pop4 +; CHECK-NEXT: call $drop=, returns_arg, $pop5 +; CHECK-NEXT: i32.const $push2=, 16 +; CHECK-NEXT: i32.add $push3=, $0, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return +entry: + %a = alloca i32 + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + %ret = call ptr @returns_arg(ptr %a) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index e3607e1..36637e1 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -199,139 +199,17 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; SIMD128-LABEL: mul_v16i8: ; SIMD128: .functype mul_v16i8 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 0 -; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 0 -; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; SIMD128-NEXT: i8x16.splat $push6=, $pop5 -; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 1 -; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1 -; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; SIMD128-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2 -; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 2 -; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $1, 2 -; SIMD128-NEXT: i32.mul $push10=, $pop9, $pop8 -; SIMD128-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10 -; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 3 -; SIMD128-NEXT: i8x16.extract_lane_u $push12=, $1, 3 -; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14 -; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 4 -; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 4 -; SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 -; SIMD128-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18 -; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 5 -; SIMD128-NEXT: i8x16.extract_lane_u $push20=, $1, 5 -; SIMD128-NEXT: i32.mul $push22=, $pop21, $pop20 -; SIMD128-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22 -; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 6 -; SIMD128-NEXT: i8x16.extract_lane_u $push24=, $1, 6 -; SIMD128-NEXT: i32.mul $push26=, $pop25, $pop24 -; SIMD128-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26 -; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 7 -; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $1, 7 -; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; SIMD128-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30 -; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 8 -; SIMD128-NEXT: i8x16.extract_lane_u $push32=, $1, 8 -; SIMD128-NEXT: i32.mul $push34=, $pop33, $pop32 -; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34 -; SIMD128-NEXT: i8x16.extract_lane_u $push37=, $0, 9 -; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 9 -; SIMD128-NEXT: i32.mul $push38=, $pop37, $pop36 -; SIMD128-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38 -; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $0, 10 -; SIMD128-NEXT: i8x16.extract_lane_u $push40=, $1, 10 -; SIMD128-NEXT: i32.mul $push42=, $pop41, $pop40 -; SIMD128-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42 -; SIMD128-NEXT: i8x16.extract_lane_u $push45=, $0, 11 -; SIMD128-NEXT: i8x16.extract_lane_u $push44=, $1, 11 -; SIMD128-NEXT: i32.mul $push46=, $pop45, $pop44 -; SIMD128-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46 -; SIMD128-NEXT: i8x16.extract_lane_u $push49=, $0, 12 -; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $1, 12 -; SIMD128-NEXT: i32.mul $push50=, $pop49, $pop48 -; SIMD128-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50 -; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 13 -; SIMD128-NEXT: i8x16.extract_lane_u $push52=, $1, 13 -; SIMD128-NEXT: i32.mul $push54=, $pop53, $pop52 -; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54 -; SIMD128-NEXT: i8x16.extract_lane_u $push57=, $0, 14 -; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 14 -; SIMD128-NEXT: i32.mul $push58=, $pop57, $pop56 -; SIMD128-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58 -; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $0, 15 -; SIMD128-NEXT: i8x16.extract_lane_u $push60=, $1, 15 -; SIMD128-NEXT: i32.mul $push62=, $pop61, $pop60 -; SIMD128-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62 -; SIMD128-NEXT: return $pop63 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $1 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: return $pop2 ; ; SIMD128-FAST-LABEL: mul_v16i8: ; SIMD128-FAST: .functype mul_v16i8 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push5=, $0, 0 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $1, 0 -; SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; SIMD128-FAST-NEXT: i8x16.splat $push7=, $pop6 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push2=, $0, 1 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1 -; SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push8=, $pop7, 1, $pop3 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push10=, $0, 2 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push9=, $1, 2 -; SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push12=, $pop8, 2, $pop11 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 3 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push13=, $1, 3 -; SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop12, 3, $pop15 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push18=, $0, 4 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 4 -; SIMD128-FAST-NEXT: i32.mul $push19=, $pop18, $pop17 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push20=, $pop16, 4, $pop19 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $0, 5 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push21=, $1, 5 -; SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push24=, $pop20, 5, $pop23 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push26=, $0, 6 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push25=, $1, 6 -; SIMD128-FAST-NEXT: i32.mul $push27=, $pop26, $pop25 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push28=, $pop24, 6, $pop27 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push30=, $0, 7 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $1, 7 -; SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push32=, $pop28, 7, $pop31 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 8 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push33=, $1, 8 -; SIMD128-FAST-NEXT: i32.mul $push35=, $pop34, $pop33 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop32, 8, $pop35 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push38=, $0, 9 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 9 -; SIMD128-FAST-NEXT: i32.mul $push39=, $pop38, $pop37 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push40=, $pop36, 9, $pop39 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $0, 10 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push41=, $1, 10 -; SIMD128-FAST-NEXT: i32.mul $push43=, $pop42, $pop41 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push44=, $pop40, 10, $pop43 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push46=, $0, 11 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push45=, $1, 11 -; SIMD128-FAST-NEXT: i32.mul $push47=, $pop46, $pop45 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push48=, $pop44, 11, $pop47 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push50=, $0, 12 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $1, 12 -; SIMD128-FAST-NEXT: i32.mul $push51=, $pop50, $pop49 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push52=, $pop48, 12, $pop51 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 13 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push53=, $1, 13 -; SIMD128-FAST-NEXT: i32.mul $push55=, $pop54, $pop53 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop52, 13, $pop55 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push58=, $0, 14 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 14 -; SIMD128-FAST-NEXT: i32.mul $push59=, $pop58, $pop57 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push60=, $pop56, 14, $pop59 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $0, 15 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push61=, $1, 15 -; SIMD128-FAST-NEXT: i32.mul $push63=, $pop62, $pop61 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop60, 15, $pop63 +; SIMD128-FAST-NEXT: i16x8.extmul_low_i8x16_u $push2=, $0, $1 +; SIMD128-FAST-NEXT: i16x8.extmul_high_i8x16_u $push1=, $0, $1 +; SIMD128-FAST-NEXT: i8x16.shuffle $push0=, $pop2, $pop1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: mul_v16i8: diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll new file mode 100644 index 0000000..6e2d860 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT + +target triple = "wasm32" + +define double @fsub_fmul_contract_f64(double %a, double %b, double %c) { +; RELAXED-LABEL: fsub_fmul_contract_f64: +; RELAXED: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64.mul $push0=, $1, $0 +; RELAXED-NEXT: f64.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_contract_f64: +; STRICT: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64.mul $push0=, $1, $0 +; STRICT-NEXT: f64.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract double %b, %a + %sub = fsub contract double %c, %mul + ret double %sub +} + +define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fsub_fmul_contract_4xf32: +; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_4xf32: +; STRICT: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $1, $0 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <4 x float> %b, %a + %sub = fsub contract <4 x float> %c, %mul + ret <4 x float> %sub +} + + +define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fsub_fmul_contract_8xf16: +; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_8xf16: +; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.mul $push0=, $1, $0 +; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <8 x half> %b, %a + %sub = fsub contract <8 x half> %c, %mul + ret <8 x half> %sub +} + + +define <4 x float> @fsub_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fsub_fmul_4xf32: +; RELAXED: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.mul $push0=, $1, $0 +; RELAXED-NEXT: f32x4.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_4xf32: +; STRICT: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $1, $0 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul <4 x float> %b, %a + %sub = fsub contract <4 x float> %c, %mul + ret <4 x float> %sub +} + +define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; RELAXED-LABEL: fsub_fmul_contract_8xf32: +; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2 +; RELAXED-NEXT: v128.store 16($0), $pop0 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1 +; RELAXED-NEXT: v128.store 0($0), $pop1 +; RELAXED-NEXT: return +; +; STRICT-LABEL: fsub_fmul_contract_8xf32: +; STRICT: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $4, $2 +; STRICT-NEXT: f32x4.sub $push1=, $6, $pop0 +; STRICT-NEXT: v128.store 16($0), $pop1 +; STRICT-NEXT: f32x4.mul $push2=, $3, $1 +; STRICT-NEXT: f32x4.sub $push3=, $5, $pop2 +; STRICT-NEXT: v128.store 0($0), $pop3 +; STRICT-NEXT: return + %mul = fmul contract <8 x float> %b, %a + %sub = fsub contract <8 x float> %c, %mul + ret <8 x float> %sub +} + + +define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fsub_fmul_contract_2xf64: +; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_2xf64: +; STRICT: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $1, $0 +; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <2 x double> %b, %a + %sub = fsub contract <2 x double> %c, %mul + ret <2 x double> %sub +} + +define float @fsub_fmul_contract_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fsub_fmul_contract_f32: +; RELAXED: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $1, $0 +; RELAXED-NEXT: f32.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_contract_f32: +; STRICT: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $1, $0 +; STRICT-NEXT: f32.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract float %b, %a + %sub = fsub contract float %c, %mul + ret float %sub +} + diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll index 1c77ad5..60cfc27 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll @@ -70,9 +70,9 @@ target triple = "wasm32-unknown-unknown" ; +call-indirect-overlong, +exception-handling, ; +extended-const, +fp16, +multimemory, +multivalue, ; +mutable-globals, +nontrapping-fptoint, +relaxed-simd, -; +reference-types, +simd128, +sign-ext, +tail-call +; +reference-types, +simd128, +sign-ext, +tail-call, +gc ; BLEEDING-EDGE-LABEL: .section .custom_section.target_features,"",@ -; BLEEDING-EDGE-NEXT: .int8 16 +; BLEEDING-EDGE-NEXT: .int8 17 ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 7 ; BLEEDING-EDGE-NEXT: .ascii "atomics" @@ -95,6 +95,9 @@ target triple = "wasm32-unknown-unknown" ; BLEEDING-EDGE-NEXT: .int8 4 ; BLEEDING-EDGE-NEXT: .ascii "fp16" ; BLEEDING-EDGE-NEXT: .int8 43 +; BLEEDING-EDGE-NEXT: .int8 2 +; BLEEDING-EDGE-NEXT: .ascii "gc" +; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 11 ; BLEEDING-EDGE-NEXT: .ascii "multimemory" ; BLEEDING-EDGE-NEXT: .int8 43 diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll index 1d194b6..4c30a3a 100644 --- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll +++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll @@ -116,40 +116,28 @@ define i8 @pairwise_mul_v16i8(<16 x i8> %arg) { ; SIMD128-LABEL: pairwise_mul_v16i8: ; SIMD128: .functype pairwise_mul_v16i8 (v128) -> (i32) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $0, 0 -; SIMD128-NEXT: i8x16.shuffle $push32=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 -; SIMD128-NEXT: local.tee $push31=, $1=, $pop32 -; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $pop31, 0 -; SIMD128-NEXT: i32.mul $push27=, $pop26, $pop25 -; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4 -; SIMD128-NEXT: i8x16.extract_lane_u $push22=, $1, 4 -; SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; SIMD128-NEXT: i32.mul $push28=, $pop27, $pop24 -; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 2 -; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $1, 2 -; SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $0, 6 -; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $1, 6 -; SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 -; SIMD128-NEXT: i32.mul $push21=, $pop20, $pop17 -; SIMD128-NEXT: i32.mul $push29=, $pop28, $pop21 -; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 1 -; SIMD128-NEXT: i8x16.extract_lane_u $push10=, $1, 1 -; SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $0, 5 -; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $1, 5 -; SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; SIMD128-NEXT: i32.mul $push13=, $pop12, $pop9 -; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 3 -; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 3 -; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 7 -; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 7 -; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; SIMD128-NEXT: i32.mul $push6=, $pop5, $pop2 -; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop6 -; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop14 -; SIMD128-NEXT: return $pop30 +; SIMD128-NEXT: i8x16.shuffle $push20=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push19=, $1=, $pop20 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $pop19 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push18=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push17=, $0=, $pop18 +; SIMD128-NEXT: i8x16.shuffle $push16=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push15=, $1=, $pop16 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push3=, $pop17, $pop15 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push2=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push14=, $pop3, $pop2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push13=, $0=, $pop14 +; SIMD128-NEXT: i8x16.shuffle $push12=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push11=, $1=, $pop12 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push5=, $pop13, $pop11 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push4=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push10=, $pop5, $pop4, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push9=, $0=, $pop10 +; SIMD128-NEXT: i8x16.shuffle $push6=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push7=, $pop9, $pop6 +; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $pop7, 0 +; SIMD128-NEXT: return $pop8 %res = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %arg) ret i8 %res } |