diff options
Diffstat (limited to 'llvm/test/CodeGen/WebAssembly')
18 files changed, 2237 insertions, 180 deletions
diff --git a/llvm/test/CodeGen/WebAssembly/libcall_vectorized.ll b/llvm/test/CodeGen/WebAssembly/libcall_vectorized.ll new file mode 100644 index 0000000..2d1056f --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/libcall_vectorized.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-keep-registers -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare <4 x float> @llvm.exp10.v4f32(<4 x float>) + +define <4 x float> @exp10_f32v4(<4 x float> %v) { +; CHECK-LABEL: exp10_f32v4: +; CHECK: .functype exp10_f32v4 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get $push12=, 0 +; CHECK-NEXT: f32x4.extract_lane $push0=, $pop12, 0 +; CHECK-NEXT: call $push1=, exp10f, $pop0 +; CHECK-NEXT: f32x4.splat $push2=, $pop1 +; CHECK-NEXT: local.get $push13=, 0 +; CHECK-NEXT: f32x4.extract_lane $push3=, $pop13, 1 +; CHECK-NEXT: call $push4=, exp10f, $pop3 +; CHECK-NEXT: f32x4.replace_lane $push5=, $pop2, 1, $pop4 +; CHECK-NEXT: local.get $push14=, 0 +; CHECK-NEXT: f32x4.extract_lane $push6=, $pop14, 2 +; CHECK-NEXT: call $push7=, exp10f, $pop6 +; CHECK-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7 +; CHECK-NEXT: local.get $push15=, 0 +; CHECK-NEXT: f32x4.extract_lane $push9=, $pop15, 3 +; CHECK-NEXT: call $push10=, exp10f, $pop9 +; CHECK-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10 +; CHECK-NEXT: return $pop11 +entry: + %r = call <4 x float> @llvm.exp10.v4f32(<4 x float> %v) + ret <4 x float> %r +} diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll new file mode 100644 index 0000000..0f968de --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -wasm-lower-em-ehsjlj -wasm-enable-sjlj -mtriple=wasm32-unknown-emscripten < %s | FileCheck %s + +@buf = external global i8 +declare i32 @setjmp(ptr) returns_twice +declare void @dummy() + +define void @test_static() { +; CHECK-LABEL: define void @test_static() personality ptr @__gxx_wasm_personality_v0 { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]] +; CHECK: [[SETJMP_DISPATCH]]: +; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ] +; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]] +; CHECK-NEXT: ] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]]) +; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]]) +; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]] +; CHECK: [[ENTRY_SPLIT_SPLIT]]: +; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: invoke void @dummy() +; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]] +; CHECK: [[_NOEXC:.*:]] +; CHECK-NEXT: ret void +; CHECK: [[ELSE]]: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]]) +; CHECK-NEXT: ret void +; CHECK: [[CATCH_DISPATCH_LONGJMP]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller +; CHECK: [[CATCH_LONGJMP:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [] +; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1) +; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1 +; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4 +; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4 +; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]] +; +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %x) + %call = call i32 @setjmp(ptr @buf) returns_twice + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %if, label %else + +if: + call void @dummy() + ret void + +else: + call void @llvm.lifetime.end.p0(i64 4, ptr %x) + ret void +} + +define void @test_dynamic(i32 %size) { +; CHECK-LABEL: define void @test_dynamic( +; CHECK-SAME: i32 [[SIZE:%.*]]) personality ptr @__gxx_wasm_personality_v0 { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]] +; CHECK: [[SETJMP_DISPATCH]]: +; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ] +; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]] +; CHECK-NEXT: ] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: [[X:%.*]] = alloca i32, i32 [[SIZE]], align 4 +; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]]) +; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]] +; CHECK: [[ENTRY_SPLIT_SPLIT]]: +; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: invoke void @dummy() +; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]] +; CHECK: [[_NOEXC:.*:]] +; CHECK-NEXT: ret void +; CHECK: [[ELSE]]: +; CHECK-NEXT: ret void +; CHECK: [[CATCH_DISPATCH_LONGJMP]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller +; CHECK: [[CATCH_LONGJMP:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [] +; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1) +; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1 +; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4 +; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4 +; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]] +; +entry: + %x = alloca i32, i32 %size, align 4 + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) + %call = call i32 @setjmp(ptr @buf) returns_twice + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %if, label %else + +if: + call void @dummy() + ret void + +else: + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll index fec9836..bab8403 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll @@ -16,10 +16,10 @@ entry: call void @foo(), !dbg !7 ret void, !dbg !8 ; CHECK: entry: - ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0:.*]] + ; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16, !dbg ![[DL0:.*]] + ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0]] ; CHECK: entry.split: - ; CHECK: alloca {{.*}}, !dbg ![[DL0]] ; CHECK: call void @__wasm_setjmp{{.*}}, !dbg ![[DL1:.*]] ; CHECK-NEXT: br {{.*}}, !dbg ![[DL2:.*]] diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index b584342..51dcf2f 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -22,17 +22,17 @@ entry: call void @longjmp(ptr %buf, i32 1) #1 unreachable ; CHECK: entry: +; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4 ; CHECK-NEXT: br label %entry.split ; CHECK: entry.split -; CHECK-NEXT: %[[BUF:.*]] = alloca [1 x %struct.__jmp_buf_tag] -; CHECK-NEXT: call void @__wasm_setjmp(ptr %[[BUF]], i32 1, ptr %functionInvocationId) +; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId) ; CHECK-NEXT: br label %entry.split.split ; CHECK: entry.split.split: ; CHECK-NEXT: phi i32 [ 0, %entry.split ], [ %[[LONGJMP_RESULT:.*]], %if.end ] -; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %[[BUF]] to [[PTR]] +; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %buf to [[PTR]] ; CHECK-NEXT: store [[PTR]] 0, ptr @__THREW__ ; CHECK-NEXT: call cc{{.*}} void @__invoke_void_[[PTR]]_i32(ptr @emscripten_longjmp, [[PTR]] %[[JMPBUF]], i32 1) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load [[PTR]], ptr @__THREW__ diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll index b4c93c4..9de6652 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll @@ -108,7 +108,7 @@ catch: ; preds = %catch.start call void @__cxa_end_catch() [ "funclet"(token %2) ] catchret from %2 to label %catchret.dest ; CHECK: catch: ; preds = %catch.start -; CHECK-NEXT: %exn = load ptr, ptr %exn.slot6, align 4 +; CHECK-NEXT: %exn = load ptr, ptr %exn.slot, align 4 ; CHECK-NEXT: %5 = call ptr @__cxa_begin_catch(ptr %exn) #3 [ "funclet"(token %2) ] ; CHECK-NEXT: invoke void @__cxa_end_catch() [ "funclet"(token %2) ] ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll index 82c04e2..e1cb859 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll @@ -25,26 +25,24 @@ entry: unreachable ; CHECK: entry: +; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4 ; CHECK-NEXT: br label %setjmp.dispatch ; CHECK: setjmp.dispatch: ; CHECK-NEXT: %[[VAL2:.*]] = phi i32 [ %val, %if.end ], [ undef, %entry ] -; CHECK-NEXT: %[[BUF:.*]] = phi ptr [ %[[BUF2:.*]], %if.end ], [ undef, %entry ] ; CHECK-NEXT: %label.phi = phi i32 [ %label, %if.end ], [ -1, %entry ] ; CHECK-NEXT: switch i32 %label.phi, label %entry.split [ ; CHECK-NEXT: i32 1, label %entry.split.split ; CHECK-NEXT: ] ; CHECK: entry.split: -; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId) ; CHECK-NEXT: br label %entry.split.split ; CHECK: entry.split.split: -; CHECK-NEXT: %[[BUF2]] = phi ptr [ %[[BUF]], %setjmp.dispatch ], [ %buf, %entry.split ] ; CHECK-NEXT: %setjmp.ret = phi i32 [ 0, %entry.split ], [ %[[VAL2]], %setjmp.dispatch ] -; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %[[BUF2]], i32 1) +; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %buf, i32 1) ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp ; CHECK: .noexc: diff --git a/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll new file mode 100644 index 0000000..8030438 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/memcmp-expand.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare i32 @memcmp(ptr, ptr, i32) + +define i1 @memcmp_expand_3(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_3: +; CHECK: .functype memcmp_expand_3 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load16_u $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 2 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i32.const $push13=, 2 +; CHECK-NEXT: i32.add $push1=, $1, $pop13 +; CHECK-NEXT: i32.load8_u $push2=, 0($pop1) +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.const $push10=, 65535 +; CHECK-NEXT: i32.and $push11=, $pop9, $pop10 +; CHECK-NEXT: i32.eqz $push12=, $pop11 +; CHECK-NEXT: return $pop12 + %cmp_3 = call i32 @memcmp(ptr %a, ptr %b, i32 3) + %res = icmp eq i32 %cmp_3, 0 + ret i1 %res +} + +define i1 @memcmp_expand_5(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_5: +; CHECK: .functype memcmp_expand_5 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 4 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) +; CHECK-NEXT: i32.const $push11=, 4 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i32.load8_u $push2=, 0($pop1) +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 + %cmp_5 = call i32 @memcmp(ptr %a, ptr %b, i32 5) + %res = icmp eq i32 %cmp_5, 0 + ret i1 %res +} + +define i1 @memcmp_expand_7(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_7: +; CHECK: .functype memcmp_expand_7 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i32.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i32.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 3 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i32.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 3 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i32.load $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i32.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i32.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i32.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 + %cmp_7 = call i32 @memcmp(ptr %a, ptr %b, i32 7) + %res = icmp eq i32 %cmp_7, 0 + ret i1 %res +} + +; INFO: Negative test +; Should not expand even with simd128 +define i1 @memcmp_expand_129(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_129: +; CHECK: .functype memcmp_expand_129 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const $push0=, 129 +; CHECK-NEXT: call $push1=, memcmp, $0, $1, $pop0 +; CHECK-NEXT: i32.eqz $push2=, $pop1 +; CHECK-NEXT: return $pop2 + %cmp_129 = call i32 @memcmp(ptr %a, ptr %b, i32 129) + %res = icmp eq i32 %cmp_129, 0 + ret i1 %res +} + +define i1 @memcmp_expand_2(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_2: +; CHECK: .functype memcmp_expand_2 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push1=, 0($0):p2align=0 +; CHECK-NEXT: i32.load16_u $push0=, 0($1):p2align=0 +; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2) + %res = icmp eq i32 %cmp_2, 0 + ret i1 %res +} + +define i1 @memcmp_expand_2_align(ptr align(2) %a, ptr align(2) %b) { +; CHECK-LABEL: memcmp_expand_2_align: +; CHECK: .functype memcmp_expand_2_align (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.load16_u $push1=, 0($0) +; CHECK-NEXT: i32.load16_u $push0=, 0($1) +; CHECK-NEXT: i32.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_2 = call i32 @memcmp(ptr %a, ptr %b, i32 2) + %res = icmp eq i32 %cmp_2, 0 + ret i1 %res +} + +define i1 @memcmp_expand_8(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_8: +; CHECK: .functype memcmp_expand_8 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push1=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push0=, 0($1):p2align=0 +; CHECK-NEXT: i64.eq $push2=, $pop1, $pop0 +; CHECK-NEXT: return $pop2 + %cmp_8 = call i32 @memcmp(ptr %a, ptr %b, i32 8) + %res = icmp eq i32 %cmp_8, 0 + ret i1 %res +} + +; TODO: Should be using a single load i64x2 or equivalent in bitsizes +define i1 @memcmp_expand_16(ptr %a, ptr %b) { +; CHECK-LABEL: memcmp_expand_16: +; CHECK: .functype memcmp_expand_16 (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push7=, 0($0):p2align=0 +; CHECK-NEXT: i64.load $push6=, 0($1):p2align=0 +; CHECK-NEXT: i64.xor $push8=, $pop7, $pop6 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push3=, $0, $pop0 +; CHECK-NEXT: i64.load $push4=, 0($pop3):p2align=0 +; CHECK-NEXT: i32.const $push11=, 8 +; CHECK-NEXT: i32.add $push1=, $1, $pop11 +; CHECK-NEXT: i64.load $push2=, 0($pop1):p2align=0 +; CHECK-NEXT: i64.xor $push5=, $pop4, $pop2 +; CHECK-NEXT: i64.or $push9=, $pop8, $pop5 +; CHECK-NEXT: i64.eqz $push10=, $pop9 +; CHECK-NEXT: return $pop10 + %cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16) + %res = icmp eq i32 %cmp_16, 0 + ret i1 %res +} diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll new file mode 100644 index 0000000..97c2311 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll @@ -0,0 +1,1413 @@ +; RUN: opt -S -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s + +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" + +%struct.TwoInts = type { i32, i32 } +%struct.ThreeInts = type { i32, i32, i32 } +%struct.FourInts = type { i32, i32, i32, i32 } +%struct.ThreeShorts = type { i16, i16, i16 } +%struct.FourShorts = type { i16, i16, i16, i16 } +%struct.FiveShorts = type { i16, i16, i16, i16, i16 } +%struct.TwoBytes = type { i8, i8 } +%struct.ThreeBytes = type { i8, i8, i8 } +%struct.FourBytes = type { i8, i8, i8, i8 } +%struct.EightBytes = type { i8, i8, i8, i8, i8, i8, i8, i8 } + +; CHECK-LABEL: two_ints_same_op: +; CHECK: loop +; CHECK: i32.load +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %21, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.TwoInts, ptr %1, i32 %8 + %10 = load i32, ptr %9, align 4 + %11 = getelementptr inbounds %struct.TwoInts, ptr %2, i32 %8 + %12 = load i32, ptr %11, align 4 + %13 = add i32 %12, %10 + %14 = getelementptr inbounds %struct.TwoInts, ptr %0, i32 %8 + store i32 %13, ptr %14, align 4 + %15 = getelementptr inbounds i8, ptr %9, i32 4 + %16 = load i32, ptr %15, align 4 + %17 = getelementptr inbounds i8, ptr %11, i32 4 + %18 = load i32, ptr %17, align 4 + %19 = add i32 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 4 + store i32 %19, ptr %20, align 4 + %21 = add nuw i32 %8, 1 + %22 = icmp eq i32 %21, %3 + br i1 %22, label %6, label %7 +} + +; CHECK-LABEL: two_ints_vary_op: +; CHECK: loop +; CHECK: i32.load +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load +; CHECK: i32.load +; CHECK: i32.sub +; CHECK: i32.store +define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %21, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.TwoInts, ptr %1, i32 %8 + %10 = load i32, ptr %9, align 4 + %11 = getelementptr inbounds %struct.TwoInts, ptr %2, i32 %8 + %12 = load i32, ptr %11, align 4 + %13 = add i32 %12, %10 + %14 = getelementptr inbounds %struct.TwoInts, ptr %0, i32 %8 + store i32 %13, ptr %14, align 4 + %15 = getelementptr inbounds i8, ptr %9, i32 4 + %16 = load i32, ptr %15, align 4 + %17 = getelementptr inbounds i8, ptr %11, i32 4 + %18 = load i32, ptr %17, align 4 + %19 = sub i32 %16, %18 + %20 = getelementptr inbounds i8, ptr %14, i32 4 + store i32 %19, ptr %20, align 4 + %21 = add nuw i32 %8, 1 + %22 = icmp eq i32 %21, %3 + br i1 %22, label %6, label %7 +} + +; CHECK-LABEL: three_ints: +; CHECK: loop +; CHECK: i32.load +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +define hidden void @three_ints(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %27, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.ThreeInts, ptr %1, i32 %8 + %10 = load i32, ptr %9, align 4 + %11 = getelementptr inbounds %struct.ThreeInts, ptr %2, i32 %8 + %12 = load i32, ptr %11, align 4 + %13 = add nsw i32 %12, %10 + %14 = getelementptr inbounds %struct.ThreeInts, ptr %0, i32 %8 + store i32 %13, ptr %14, align 4 + %15 = getelementptr inbounds i8, ptr %9, i32 4 + %16 = load i32, ptr %15, align 4 + %17 = getelementptr inbounds i8, ptr %11, i32 4 + %18 = load i32, ptr %17, align 4 + %19 = add nsw i32 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 4 + store i32 %19, ptr %20, align 4 + %21 = getelementptr inbounds i8, ptr %9, i32 8 + %22 = load i32, ptr %21, align 4 + %23 = getelementptr inbounds i8, ptr %11, i32 8 + %24 = load i32, ptr %23, align 4 + %25 = add nsw i32 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 8 + store i32 %25, ptr %26, align 4 + %27 = add nuw i32 %8, 1 + %28 = icmp eq i32 %27, %3 + br i1 %28, label %6, label %7 +} + +; CHECK-LABEL: three_shorts: +; CHECK: loop +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.mul +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.mul +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.mul +; CHECK: i32.store16 +define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %27, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.ThreeShorts, ptr %1, i32 %8 + %10 = load i16, ptr %9, align 2 + %11 = getelementptr inbounds %struct.ThreeShorts, ptr %2, i32 %8 + %12 = load i16, ptr %11, align 2 + %13 = mul i16 %12, %10 + %14 = getelementptr inbounds %struct.ThreeShorts, ptr %0, i32 %8 + store i16 %13, ptr %14, align 2 + %15 = getelementptr inbounds i8, ptr %9, i32 2 + %16 = load i16, ptr %15, align 2 + %17 = getelementptr inbounds i8, ptr %11, i32 2 + %18 = load i16, ptr %17, align 2 + %19 = mul i16 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 2 + store i16 %19, ptr %20, align 2 + %21 = getelementptr inbounds i8, ptr %9, i32 4 + %22 = load i16, ptr %21, align 2 + %23 = getelementptr inbounds i8, ptr %11, i32 4 + %24 = load i16, ptr %23, align 2 + %25 = mul i16 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 4 + store i16 %25, ptr %26, align 2 + %27 = add nuw i32 %8, 1 + %28 = icmp eq i32 %27, %3 + br i1 %28, label %6, label %7 +} + +; CHECK-LABEL: four_shorts_same_op: +; CHECK: loop +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %33, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourShorts, ptr %1, i32 %8 + %10 = load i16, ptr %9, align 2 + %11 = getelementptr inbounds %struct.FourShorts, ptr %2, i32 %8 + %12 = load i16, ptr %11, align 2 + %13 = sub i16 %10, %12 + %14 = getelementptr inbounds %struct.FourShorts, ptr %0, i32 %8 + store i16 %13, ptr %14, align 2 + %15 = getelementptr inbounds i8, ptr %9, i32 2 + %16 = load i16, ptr %15, align 2 + %17 = getelementptr inbounds i8, ptr %11, i32 2 + %18 = load i16, ptr %17, align 2 + %19 = sub i16 %16, %18 + %20 = getelementptr inbounds i8, ptr %14, i32 2 + store i16 %19, ptr %20, align 2 + %21 = getelementptr inbounds i8, ptr %9, i32 4 + %22 = load i16, ptr %21, align 2 + %23 = getelementptr inbounds i8, ptr %11, i32 4 + %24 = load i16, ptr %23, align 2 + %25 = sub i16 %22, %24 + %26 = getelementptr inbounds i8, ptr %14, i32 4 + store i16 %25, ptr %26, align 2 + %27 = getelementptr inbounds i8, ptr %9, i32 6 + %28 = load i16, ptr %27, align 2 + %29 = getelementptr inbounds i8, ptr %11, i32 6 + %30 = load i16, ptr %29, align 2 + %31 = sub i16 %28, %30 + %32 = getelementptr inbounds i8, ptr %14, i32 6 + store i16 %31, ptr %32, align 2 + %33 = add nuw i32 %8, 1 + %34 = icmp eq i32 %33, %3 + br i1 %34, label %6, label %7 +} + +; CHECK-LABEL: four_shorts_split_op: +; CHECK: loop +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.or +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.or +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.xor +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.xor +; CHECK: i32.store16 +define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %33, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourShorts, ptr %1, i32 %8 + %10 = load i16, ptr %9, align 2 + %11 = getelementptr inbounds %struct.FourShorts, ptr %2, i32 %8 + %12 = load i16, ptr %11, align 2 + %13 = or i16 %12, %10 + %14 = getelementptr inbounds %struct.FourShorts, ptr %0, i32 %8 + store i16 %13, ptr %14, align 2 + %15 = getelementptr inbounds i8, ptr %9, i32 2 + %16 = load i16, ptr %15, align 2 + %17 = getelementptr inbounds i8, ptr %11, i32 2 + %18 = load i16, ptr %17, align 2 + %19 = or i16 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 2 + store i16 %19, ptr %20, align 2 + %21 = getelementptr inbounds i8, ptr %9, i32 4 + %22 = load i16, ptr %21, align 2 + %23 = getelementptr inbounds i8, ptr %11, i32 4 + %24 = load i16, ptr %23, align 2 + %25 = xor i16 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 4 + store i16 %25, ptr %26, align 2 + %27 = getelementptr inbounds i8, ptr %9, i32 6 + %28 = load i16, ptr %27, align 2 + %29 = getelementptr inbounds i8, ptr %11, i32 6 + %30 = load i16, ptr %29, align 2 + %31 = xor i16 %30, %28 + %32 = getelementptr inbounds i8, ptr %14, i32 6 + store i16 %31, ptr %32, align 2 + %33 = add nuw i32 %8, 1 + %34 = icmp eq i32 %33, %3 + br i1 %34, label %6, label %7 +} + +; CHECK-LABEL: four_shorts_interleave_op: +; CHECK: loop +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.or +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.xor +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.or +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.xor +; CHECK: i32.store16 +define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %33, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourShorts, ptr %1, i32 %8 + %10 = load i16, ptr %9, align 2 + %11 = getelementptr inbounds %struct.FourShorts, ptr %2, i32 %8 + %12 = load i16, ptr %11, align 2 + %13 = or i16 %12, %10 + %14 = getelementptr inbounds %struct.FourShorts, ptr %0, i32 %8 + store i16 %13, ptr %14, align 2 + %15 = getelementptr inbounds i8, ptr %9, i32 2 + %16 = load i16, ptr %15, align 2 + %17 = getelementptr inbounds i8, ptr %11, i32 2 + %18 = load i16, ptr %17, align 2 + %19 = xor i16 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 2 + store i16 %19, ptr %20, align 2 + %21 = getelementptr inbounds i8, ptr %9, i32 4 + %22 = load i16, ptr %21, align 2 + %23 = getelementptr inbounds i8, ptr %11, i32 4 + %24 = load i16, ptr %23, align 2 + %25 = or i16 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 4 + store i16 %25, ptr %26, align 2 + %27 = getelementptr inbounds i8, ptr %9, i32 6 + %28 = load i16, ptr %27, align 2 + %29 = getelementptr inbounds i8, ptr %11, i32 6 + %30 = load i16, ptr %29, align 2 + %31 = xor i16 %30, %28 + %32 = getelementptr inbounds i8, ptr %14, i32 6 + store i16 %31, ptr %32, align 2 + %33 = add nuw i32 %8, 1 + %34 = icmp eq i32 %33, %3 + br i1 %34, label %6, label %7 +} + +; CHECK-LABEL: five_shorts: +; CHECK: loop +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +; CHECK: i32.load16_u +; CHECK: i32.load16_u +; CHECK: i32.sub +; CHECK: i32.store16 +define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %39, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FiveShorts, ptr %1, i32 %8 + %10 = load i16, ptr %9, align 1 + %11 = getelementptr inbounds %struct.FiveShorts, ptr %2, i32 %8 + %12 = load i16, ptr %11, align 1 + %13 = sub i16 %10, %12 + %14 = getelementptr inbounds %struct.FiveShorts, ptr %0, i32 %8 + store i16 %13, ptr %14, align 1 + %15 = getelementptr inbounds i16, ptr %9, i32 1 + %16 = load i16, ptr %15, align 1 + %17 = getelementptr inbounds i16, ptr %11, i32 1 + %18 = load i16, ptr %17, align 1 + %19 = sub i16 %16, %18 + %20 = getelementptr inbounds i16, ptr %14, i32 1 + store i16 %19, ptr %20, align 1 + %21 = getelementptr inbounds i16, ptr %9, i32 2 + %22 = load i16, ptr %21, align 1 + %23 = getelementptr inbounds i16, ptr %11, i32 2 + %24 = load i16, ptr %23, align 1 + %25 = sub i16 %22, %24 + %26 = getelementptr inbounds i16, ptr %14, i32 2 + store i16 %25, ptr %26, align 1 + %27 = getelementptr inbounds i16, ptr %9, i32 3 + %28 = load i16, ptr %27, align 1 + %29 = getelementptr inbounds i16, ptr %11, i32 3 + %30 = load i16, ptr %29, align 1 + %31 = sub i16 %28, %30 + %32 = getelementptr inbounds i16, ptr %14, i32 3 + store i16 %31, ptr %32, align 1 + %33 = getelementptr inbounds i16, ptr %9, i32 4 + %34 = load i16, ptr %33, align 1 + %35 = getelementptr inbounds i16, ptr %11, i32 4 + %36 = load i16, ptr %35, align 1 + %37 = sub i16 %34, %36 + %38 = getelementptr inbounds i16, ptr %14, i32 4 + store i16 %37, ptr %38, align 1 + %39 = add nuw i32 %8, 1 + %40 = icmp eq i32 %39, %3 + br i1 %40, label %6, label %7 +} + +; CHECK-LABEL: two_bytes_same_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %21, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.TwoBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.TwoBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = mul i8 %12, %10 + %14 = getelementptr inbounds %struct.TwoBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = mul i8 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = add nuw i32 %8, 1 + %22 = icmp eq i32 %21, %3 + br i1 %22, label %6, label %7 +} + +; CHECK-LABEL: two_bytes_vary_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %21, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.TwoBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.TwoBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = mul i8 %12, %10 + %14 = getelementptr inbounds %struct.TwoBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = sub i8 %16, %18 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = add nuw i32 %8, 1 + %22 = icmp eq i32 %21, %3 + br i1 %22, label %6, label %7 +} + +; CHECK-LABEL: three_bytes_same_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store8 +define hidden void @three_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %27, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.ThreeBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.ThreeBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = and i8 %12, %10 + %14 = getelementptr inbounds %struct.ThreeBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = and i8 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = and i8 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = add nuw i32 %8, 1 + %28 = icmp eq i32 %27, %3 + br i1 %28, label %6, label %7 +} + +; CHECK-LABEL: three_bytes_interleave_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %27, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.ThreeBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.ThreeBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = add i8 %12, %10 + %14 = getelementptr inbounds %struct.ThreeBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = sub i8 %16, %18 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = add i8 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = add nuw i32 %8, 1 + %28 = icmp eq i32 %27, %3 + br i1 %28, label %6, label %7 +} + +; CHECK-LABEL: four_bytes_same_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store8 +define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %33, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.FourBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = and i8 %12, %10 + %14 = getelementptr inbounds %struct.FourBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = and i8 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = and i8 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = getelementptr inbounds i8, ptr %9, i32 3 + %28 = load i8, ptr %27, align 1 + %29 = getelementptr inbounds i8, ptr %11, i32 3 + %30 = load i8, ptr %29, align 1 + %31 = and i8 %30, %28 + %32 = getelementptr inbounds i8, ptr %14, i32 3 + store i8 %31, ptr %32, align 1 + %33 = add nuw i32 %8, 1 + %34 = icmp eq i32 %33, %3 + br i1 %34, label %6, label %7 +} + +; CHECK-LABEL: four_bytes_split_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %33, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.FourBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = mul i8 %12, %10 + %14 = getelementptr inbounds %struct.FourBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = mul i8 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = sub i8 %22, %24 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = getelementptr inbounds i8, ptr %9, i32 3 + %28 = load i8, ptr %27, align 1 + %29 = getelementptr inbounds i8, ptr %11, i32 3 + %30 = load i8, ptr %29, align 1 + %31 = sub i8 %28, %30 + %32 = getelementptr inbounds i8, ptr %14, i32 3 + store i8 %31, ptr %32, align 1 + %33 = add nuw i32 %8, 1 + %34 = icmp eq i32 %33, %3 + br i1 %34, label %6, label %7 +} + +; CHECK-LABEL: four_bytes_interleave_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %33, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.FourBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = add i8 %12, %10 + %14 = getelementptr inbounds %struct.FourBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = sub i8 %16, %18 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = add i8 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = getelementptr inbounds i8, ptr %9, i32 3 + %28 = load i8, ptr %27, align 1 + %29 = getelementptr inbounds i8, ptr %11, i32 3 + %30 = load i8, ptr %29, align 1 + %31 = sub i8 %28, %30 + %32 = getelementptr inbounds i8, ptr %14, i32 3 + store i8 %31, ptr %32, align 1 + %33 = add nuw i32 %8, 1 + %34 = icmp eq i32 %33, %3 + br i1 %34, label %6, label %7 +} + +; CHECK-LABEL: eight_bytes_same_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store8 +define hidden void @eight_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %57, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.EightBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.EightBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = mul i8 %12, %10 + %14 = getelementptr inbounds %struct.EightBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = mul i8 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = mul i8 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = getelementptr inbounds i8, ptr %9, i32 3 + %28 = load i8, ptr %27, align 1 + %29 = getelementptr inbounds i8, ptr %11, i32 3 + %30 = load i8, ptr %29, align 1 + %31 = mul i8 %30, %28 + %32 = getelementptr inbounds i8, ptr %14, i32 3 + store i8 %31, ptr %32, align 1 + %33 = getelementptr inbounds i8, ptr %9, i32 4 + %34 = load i8, ptr %33, align 1 + %35 = getelementptr inbounds i8, ptr %11, i32 4 + %36 = load i8, ptr %35, align 1 + %37 = mul i8 %36, %34 + %38 = getelementptr inbounds i8, ptr %14, i32 4 + store i8 %37, ptr %38, align 1 + %39 = getelementptr inbounds i8, ptr %9, i32 5 + %40 = load i8, ptr %39, align 1 + %41 = getelementptr inbounds i8, ptr %11, i32 5 + %42 = load i8, ptr %41, align 1 + %43 = mul i8 %42, %40 + %44 = getelementptr inbounds i8, ptr %14, i32 5 + store i8 %43, ptr %44, align 1 + %45 = getelementptr inbounds i8, ptr %9, i32 6 + %46 = load i8, ptr %45, align 1 + %47 = getelementptr inbounds i8, ptr %11, i32 6 + %48 = load i8, ptr %47, align 1 + %49 = mul i8 %48, %46 + %50 = getelementptr inbounds i8, ptr %14, i32 6 + store i8 %49, ptr %50, align 1 + %51 = getelementptr inbounds i8, ptr %9, i32 7 + %52 = load i8, ptr %51, align 1 + %53 = getelementptr inbounds i8, ptr %11, i32 7 + %54 = load i8, ptr %53, align 1 + %55 = mul i8 %54, %52 + %56 = getelementptr inbounds i8, ptr %14, i32 7 + store i8 %55, ptr %56, align 1 + %57 = add nuw i32 %8, 1 + %58 = icmp eq i32 %57, %3 + br i1 %58, label %6, label %7 +} + +; CHECK-LABEL: eight_bytes_split_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +define hidden void @eight_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %57, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.EightBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.EightBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = add i8 %12, %10 + %14 = getelementptr inbounds %struct.EightBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = add i8 %18, %16 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = add i8 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = getelementptr inbounds i8, ptr %9, i32 3 + %28 = load i8, ptr %27, align 1 + %29 = getelementptr inbounds i8, ptr %11, i32 3 + %30 = load i8, ptr %29, align 1 + %31 = add i8 %30, %28 + %32 = getelementptr inbounds i8, ptr %14, i32 3 + store i8 %31, ptr %32, align 1 + %33 = getelementptr inbounds i8, ptr %9, i32 4 + %34 = load i8, ptr %33, align 1 + %35 = getelementptr inbounds i8, ptr %11, i32 4 + %36 = load i8, ptr %35, align 1 + %37 = sub i8 %34, %36 + %38 = getelementptr inbounds i8, ptr %14, i32 4 + store i8 %37, ptr %38, align 1 + %39 = getelementptr inbounds i8, ptr %9, i32 5 + %40 = load i8, ptr %39, align 1 + %41 = getelementptr inbounds i8, ptr %11, i32 5 + %42 = load i8, ptr %41, align 1 + %43 = sub i8 %40, %42 + %44 = getelementptr inbounds i8, ptr %14, i32 5 + store i8 %43, ptr %44, align 1 + %45 = getelementptr inbounds i8, ptr %9, i32 6 + %46 = load i8, ptr %45, align 1 + %47 = getelementptr inbounds i8, ptr %11, i32 6 + %48 = load i8, ptr %47, align 1 + %49 = sub i8 %46, %48 + %50 = getelementptr inbounds i8, ptr %14, i32 6 + store i8 %49, ptr %50, align 1 + %51 = getelementptr inbounds i8, ptr %9, i32 7 + %52 = load i8, ptr %51, align 1 + %53 = getelementptr inbounds i8, ptr %11, i32 7 + %54 = load i8, ptr %53, align 1 + %55 = sub i8 %52, %54 + %56 = getelementptr inbounds i8, ptr %14, i32 7 + store i8 %55, ptr %56, align 1 + %57 = add nuw i32 %8, 1 + %58 = icmp eq i32 %57, %3 + br i1 %58, label %6, label %7 +} + +; CHECK-LABEL: eight_bytes_interleave_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store8 +define hidden void @eight_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %57, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.EightBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = getelementptr inbounds %struct.EightBytes, ptr %2, i32 %8 + %12 = load i8, ptr %11, align 1 + %13 = add i8 %12, %10 + %14 = getelementptr inbounds %struct.EightBytes, ptr %0, i32 %8 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %9, i32 1 + %16 = load i8, ptr %15, align 1 + %17 = getelementptr inbounds i8, ptr %11, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = sub i8 %16, %18 + %20 = getelementptr inbounds i8, ptr %14, i32 1 + store i8 %19, ptr %20, align 1 + %21 = getelementptr inbounds i8, ptr %9, i32 2 + %22 = load i8, ptr %21, align 1 + %23 = getelementptr inbounds i8, ptr %11, i32 2 + %24 = load i8, ptr %23, align 1 + %25 = add i8 %24, %22 + %26 = getelementptr inbounds i8, ptr %14, i32 2 + store i8 %25, ptr %26, align 1 + %27 = getelementptr inbounds i8, ptr %9, i32 3 + %28 = load i8, ptr %27, align 1 + %29 = getelementptr inbounds i8, ptr %11, i32 3 + %30 = load i8, ptr %29, align 1 + %31 = sub i8 %28, %30 + %32 = getelementptr inbounds i8, ptr %14, i32 3 + store i8 %31, ptr %32, align 1 + %33 = getelementptr inbounds i8, ptr %9, i32 4 + %34 = load i8, ptr %33, align 1 + %35 = getelementptr inbounds i8, ptr %11, i32 4 + %36 = load i8, ptr %35, align 1 + %37 = add i8 %36, %34 + %38 = getelementptr inbounds i8, ptr %14, i32 4 + store i8 %37, ptr %38, align 1 + %39 = getelementptr inbounds i8, ptr %9, i32 5 + %40 = load i8, ptr %39, align 1 + %41 = getelementptr inbounds i8, ptr %11, i32 5 + %42 = load i8, ptr %41, align 1 + %43 = sub i8 %40, %42 + %44 = getelementptr inbounds i8, ptr %14, i32 5 + store i8 %43, ptr %44, align 1 + %45 = getelementptr inbounds i8, ptr %9, i32 6 + %46 = load i8, ptr %45, align 1 + %47 = getelementptr inbounds i8, ptr %11, i32 6 + %48 = load i8, ptr %47, align 1 + %49 = add i8 %48, %46 + %50 = getelementptr inbounds i8, ptr %14, i32 6 + store i8 %49, ptr %50, align 1 + %51 = getelementptr inbounds i8, ptr %9, i32 7 + %52 = load i8, ptr %51, align 1 + %53 = getelementptr inbounds i8, ptr %11, i32 7 + %54 = load i8, ptr %53, align 1 + %55 = sub i8 %52, %54 + %56 = getelementptr inbounds i8, ptr %14, i32 7 + store i8 %55, ptr %56, align 1 + %57 = add nuw i32 %8, 1 + %58 = icmp eq i32 %57, %3 + br i1 %58, label %6, label %7 +} + +; CHECK-LABEL: four_bytes_into_four_ints_same_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.load +; CHECK: i32.add +; CHECK: i32.store +define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noundef %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %49, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = zext i8 %10 to i32 + %12 = getelementptr inbounds %struct.FourBytes, ptr %2, i32 %8 + %13 = load i8, ptr %12, align 1 + %14 = zext i8 %13 to i32 + %15 = mul nuw nsw i32 %14, %11 + %16 = getelementptr inbounds %struct.FourInts, ptr %0, i32 %8 + %17 = load i32, ptr %16, align 4 + %18 = add nsw i32 %15, %17 + store i32 %18, ptr %16, align 4 + %19 = getelementptr inbounds i8, ptr %9, i32 1 + %20 = load i8, ptr %19, align 1 + %21 = zext i8 %20 to i32 + %22 = getelementptr inbounds i8, ptr %12, i32 1 + %23 = load i8, ptr %22, align 1 + %24 = zext i8 %23 to i32 + %25 = mul nuw nsw i32 %24, %21 + %26 = getelementptr inbounds i8, ptr %16, i32 4 + %27 = load i32, ptr %26, align 4 + %28 = add nsw i32 %25, %27 + store i32 %28, ptr %26, align 4 + %29 = getelementptr inbounds i8, ptr %9, i32 2 + %30 = load i8, ptr %29, align 1 + %31 = zext i8 %30 to i32 + %32 = getelementptr inbounds i8, ptr %12, i32 2 + %33 = load i8, ptr %32, align 1 + %34 = zext i8 %33 to i32 + %35 = mul nuw nsw i32 %34, %31 + %36 = getelementptr inbounds i8, ptr %16, i32 8 + %37 = load i32, ptr %36, align 4 + %38 = add nsw i32 %35, %37 + store i32 %38, ptr %36, align 4 + %39 = getelementptr inbounds i8, ptr %9, i32 3 + %40 = load i8, ptr %39, align 1 + %41 = zext i8 %40 to i32 + %42 = getelementptr inbounds i8, ptr %12, i32 3 + %43 = load i8, ptr %42, align 1 + %44 = zext i8 %43 to i32 + %45 = mul nuw nsw i32 %44, %41 + %46 = getelementptr inbounds i8, ptr %16, i32 12 + %47 = load i32, ptr %46, align 4 + %48 = add nsw i32 %45, %47 + store i32 %48, ptr %46, align 4 + %49 = add nuw i32 %8, 1 + %50 = icmp eq i32 %49, %3 + br i1 %50, label %6, label %7 +} + +; CHECK-LABEL: four_bytes_into_four_ints_vary_op: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.add +; CHECK: i32.store +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.sub +; CHECK: i32.store +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.mul +; CHECK: i32.store +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.and +; CHECK: i32.store +define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { + %5 = icmp eq i32 %3, 0 + br i1 %5, label %6, label %7 + +6: ; preds = %7, %4 + ret void + +7: ; preds = %4, %7 + %8 = phi i32 [ %40, %7 ], [ 0, %4 ] + %9 = getelementptr inbounds %struct.FourBytes, ptr %1, i32 %8 + %10 = load i8, ptr %9, align 1 + %11 = zext i8 %10 to i32 + %12 = getelementptr inbounds %struct.FourBytes, ptr %2, i32 %8 + %13 = load i8, ptr %12, align 1 + %14 = zext i8 %13 to i32 + %15 = add nuw nsw i32 %14, %11 + %16 = getelementptr inbounds %struct.FourInts, ptr %0, i32 %8 + store i32 %15, ptr %16, align 4 + %17 = getelementptr inbounds i8, ptr %9, i32 1 + %18 = load i8, ptr %17, align 1 + %19 = zext i8 %18 to i32 + %20 = getelementptr inbounds i8, ptr %12, i32 1 + %21 = load i8, ptr %20, align 1 + %22 = zext i8 %21 to i32 + %23 = sub nsw i32 %19, %22 + %24 = getelementptr inbounds i8, ptr %16, i32 4 + store i32 %23, ptr %24, align 4 + %25 = getelementptr inbounds i8, ptr %9, i32 2 + %26 = load i8, ptr %25, align 1 + %27 = zext i8 %26 to i32 + %28 = getelementptr inbounds i8, ptr %12, i32 2 + %29 = load i8, ptr %28, align 1 + %30 = zext i8 %29 to i32 + %31 = mul nuw nsw i32 %30, %27 + %32 = getelementptr inbounds i8, ptr %16, i32 8 + store i32 %31, ptr %32, align 4 + %33 = getelementptr inbounds i8, ptr %9, i32 3 + %34 = load i8, ptr %33, align 1 + %35 = getelementptr inbounds i8, ptr %12, i32 3 + %36 = load i8, ptr %35, align 1 + %37 = and i8 %36, %34 + %38 = zext i8 %37 to i32 + %39 = getelementptr inbounds i8, ptr %16, i32 12 + store i32 %38, ptr %39, align 4 + %40 = add nuw i32 %8, 1 + %41 = icmp eq i32 %40, %3 + br i1 %41, label %6, label %7 +} + +; CHECK-LABEL: scale_uv_row_down2: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.store8 +define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { + %5 = icmp sgt i32 %3, 0 + br i1 %5, label %6, label %19 + +6: ; preds = %4, %6 + %7 = phi i32 [ %17, %6 ], [ 0, %4 ] + %8 = phi ptr [ %15, %6 ], [ %0, %4 ] + %9 = phi ptr [ %16, %6 ], [ %2, %4 ] + %10 = getelementptr inbounds i8, ptr %8, i32 2 + %11 = load i8, ptr %10, align 1 + store i8 %11, ptr %9, align 1 + %12 = getelementptr inbounds i8, ptr %8, i32 3 + %13 = load i8, ptr %12, align 1 + %14 = getelementptr inbounds i8, ptr %9, i32 1 + store i8 %13, ptr %14, align 1 + %15 = getelementptr inbounds i8, ptr %8, i32 4 + %16 = getelementptr inbounds i8, ptr %9, i32 2 + %17 = add nuw nsw i32 %7, 1 + %18 = icmp eq i32 %17, %3 + br i1 %18, label %19, label %6 + +19: ; preds = %6, %4 + ret void +} + +; CHECK-LABEL: scale_uv_row_down2_box: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.shr_u +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.shr_u +; CHECK: i32.store8 +define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { + %5 = icmp sgt i32 %3, 0 + br i1 %5, label %6, label %54 + +6: ; preds = %4 + %7 = add nsw i32 %1, 2 + %8 = add nsw i32 %1, 1 + %9 = add nsw i32 %1, 3 + br label %10 + +10: ; preds = %6, %10 + %11 = phi i32 [ 0, %6 ], [ %52, %10 ] + %12 = phi ptr [ %0, %6 ], [ %50, %10 ] + %13 = phi ptr [ %2, %6 ], [ %51, %10 ] + %14 = load i8, ptr %12, align 1 + %15 = zext i8 %14 to i16 + %16 = getelementptr inbounds i8, ptr %12, i32 2 + %17 = load i8, ptr %16, align 1 + %18 = zext i8 %17 to i16 + %19 = getelementptr inbounds i8, ptr %12, i32 %1 + %20 = load i8, ptr %19, align 1 + %21 = zext i8 %20 to i16 + %22 = getelementptr inbounds i8, ptr %12, i32 %7 + %23 = load i8, ptr %22, align 1 + %24 = zext i8 %23 to i16 + %25 = add nuw nsw i16 %15, 2 + %26 = add nuw nsw i16 %25, %18 + %27 = add nuw nsw i16 %26, %21 + %28 = add nuw nsw i16 %27, %24 + %29 = lshr i16 %28, 2 + %30 = trunc nuw i16 %29 to i8 + store i8 %30, ptr %13, align 1 + %31 = getelementptr inbounds i8, ptr %12, i32 1 + %32 = load i8, ptr %31, align 1 + %33 = zext i8 %32 to i16 + %34 = getelementptr inbounds i8, ptr %12, i32 3 + %35 = load i8, ptr %34, align 1 + %36 = zext i8 %35 to i16 + %37 = getelementptr inbounds i8, ptr %12, i32 %8 + %38 = load i8, ptr %37, align 1 + %39 = zext i8 %38 to i16 + %40 = getelementptr inbounds i8, ptr %12, i32 %9 + %41 = load i8, ptr %40, align 1 + %42 = zext i8 %41 to i16 + %43 = add nuw nsw i16 %33, 2 + %44 = add nuw nsw i16 %43, %36 + %45 = add nuw nsw i16 %44, %39 + %46 = add nuw nsw i16 %45, %42 + %47 = lshr i16 %46, 2 + %48 = trunc nuw i16 %47 to i8 + %49 = getelementptr inbounds i8, ptr %13, i32 1 + store i8 %48, ptr %49, align 1 + %50 = getelementptr inbounds i8, ptr %12, i32 4 + %51 = getelementptr inbounds i8, ptr %13, i32 2 + %52 = add nuw nsw i32 %11, 1 + %53 = icmp eq i32 %52, %3 + br i1 %53, label %54, label %10 + +54: ; preds = %10, %4 + ret void +} + +; CHECK-LABEL: scale_uv_row_down2_linear: +; CHECK: loop +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.shr_u +; CHECK: i32.store8 +; CHECK: i32.load8_u +; CHECK: i32.load8_u +; CHECK: i32.shr_u +; CHECK: i32.store8 +define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { + %5 = icmp sgt i32 %3, 0 + br i1 %5, label %6, label %34 + +6: ; preds = %4, %6 + %7 = phi i32 [ %32, %6 ], [ 0, %4 ] + %8 = phi ptr [ %30, %6 ], [ %0, %4 ] + %9 = phi ptr [ %31, %6 ], [ %2, %4 ] + %10 = load i8, ptr %8, align 1 + %11 = zext i8 %10 to i16 + %12 = getelementptr inbounds i8, ptr %8, i32 2 + %13 = load i8, ptr %12, align 1 + %14 = zext i8 %13 to i16 + %15 = add nuw nsw i16 %11, 1 + %16 = add nuw nsw i16 %15, %14 + %17 = lshr i16 %16, 1 + %18 = trunc nuw i16 %17 to i8 + store i8 %18, ptr %9, align 1 + %19 = getelementptr inbounds i8, ptr %8, i32 1 + %20 = load i8, ptr %19, align 1 + %21 = zext i8 %20 to i16 + %22 = getelementptr inbounds i8, ptr %8, i32 3 + %23 = load i8, ptr %22, align 1 + %24 = zext i8 %23 to i16 + %25 = add nuw nsw i16 %21, 1 + %26 = add nuw nsw i16 %25, %24 + %27 = lshr i16 %26, 1 + %28 = trunc nuw i16 %27 to i8 + %29 = getelementptr inbounds i8, ptr %9, i32 1 + store i8 %28, ptr %29, align 1 + %30 = getelementptr inbounds i8, ptr %8, i32 4 + %31 = getelementptr inbounds i8, ptr %9, i32 2 + %32 = add nuw nsw i32 %7, 1 + %33 = icmp eq i32 %32, %3 + br i1 %33, label %34, label %6 + +34: ; preds = %6, %4 + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll b/llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll new file mode 100644 index 0000000..1f6c960 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/narrow-simd-mul.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=wasm32 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s + +define <8 x i8> @mul_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: mul_v8i8: +; CHECK: .functype mul_v8i8 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i8x16.extract_lane_u $push4=, $0, 0 +; CHECK-NEXT: i8x16.extract_lane_u $push3=, $1, 0 +; CHECK-NEXT: i32.mul $push5=, $pop4, $pop3 +; CHECK-NEXT: i8x16.splat $push6=, $pop5 +; CHECK-NEXT: i8x16.extract_lane_u $push1=, $0, 1 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 1 +; CHECK-NEXT: i32.mul $push2=, $pop1, $pop0 +; CHECK-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2 +; CHECK-NEXT: i8x16.extract_lane_u $push9=, $0, 2 +; CHECK-NEXT: i8x16.extract_lane_u $push8=, $1, 2 +; CHECK-NEXT: i32.mul $push10=, $pop9, $pop8 +; CHECK-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10 +; CHECK-NEXT: i8x16.extract_lane_u $push13=, $0, 3 +; CHECK-NEXT: i8x16.extract_lane_u $push12=, $1, 3 +; CHECK-NEXT: i32.mul $push14=, $pop13, $pop12 +; CHECK-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14 +; CHECK-NEXT: i8x16.extract_lane_u $push17=, $0, 4 +; CHECK-NEXT: i8x16.extract_lane_u $push16=, $1, 4 +; CHECK-NEXT: i32.mul $push18=, $pop17, $pop16 +; CHECK-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18 +; CHECK-NEXT: i8x16.extract_lane_u $push21=, $0, 5 +; CHECK-NEXT: i8x16.extract_lane_u $push20=, $1, 5 +; CHECK-NEXT: i32.mul $push22=, $pop21, $pop20 +; CHECK-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22 +; CHECK-NEXT: i8x16.extract_lane_u $push25=, $0, 6 +; CHECK-NEXT: i8x16.extract_lane_u $push24=, $1, 6 +; CHECK-NEXT: i32.mul $push26=, $pop25, $pop24 +; CHECK-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26 +; CHECK-NEXT: i8x16.extract_lane_u $push29=, $0, 7 +; CHECK-NEXT: i8x16.extract_lane_u $push28=, $1, 7 +; CHECK-NEXT: i32.mul $push30=, $pop29, $pop28 +; CHECK-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30 +; CHECK-NEXT: i8x16.extract_lane_u $push33=, $0, 8 +; CHECK-NEXT: i8x16.extract_lane_u $push32=, $1, 8 +; CHECK-NEXT: i32.mul $push34=, $pop33, $pop32 +; CHECK-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34 +; CHECK-NEXT: i8x16.extract_lane_u $push37=, $0, 9 +; CHECK-NEXT: i8x16.extract_lane_u $push36=, $1, 9 +; CHECK-NEXT: i32.mul $push38=, $pop37, $pop36 +; CHECK-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38 +; CHECK-NEXT: i8x16.extract_lane_u $push41=, $0, 10 +; CHECK-NEXT: i8x16.extract_lane_u $push40=, $1, 10 +; CHECK-NEXT: i32.mul $push42=, $pop41, $pop40 +; CHECK-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42 +; CHECK-NEXT: i8x16.extract_lane_u $push45=, $0, 11 +; CHECK-NEXT: i8x16.extract_lane_u $push44=, $1, 11 +; CHECK-NEXT: i32.mul $push46=, $pop45, $pop44 +; CHECK-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46 +; CHECK-NEXT: i8x16.extract_lane_u $push49=, $0, 12 +; CHECK-NEXT: i8x16.extract_lane_u $push48=, $1, 12 +; CHECK-NEXT: i32.mul $push50=, $pop49, $pop48 +; CHECK-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50 +; CHECK-NEXT: i8x16.extract_lane_u $push53=, $0, 13 +; CHECK-NEXT: i8x16.extract_lane_u $push52=, $1, 13 +; CHECK-NEXT: i32.mul $push54=, $pop53, $pop52 +; CHECK-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54 +; CHECK-NEXT: i8x16.extract_lane_u $push57=, $0, 14 +; CHECK-NEXT: i8x16.extract_lane_u $push56=, $1, 14 +; CHECK-NEXT: i32.mul $push58=, $pop57, $pop56 +; CHECK-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58 +; CHECK-NEXT: i8x16.extract_lane_u $push61=, $0, 15 +; CHECK-NEXT: i8x16.extract_lane_u $push60=, $1, 15 +; CHECK-NEXT: i32.mul $push62=, $pop61, $pop60 +; CHECK-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62 +; CHECK-NEXT: return $pop63 + %mul = mul <8 x i8> %a, %b + ret <8 x i8> %mul +} + +define <4 x i16> @mul_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: mul_v4i16: +; CHECK: .functype mul_v4i16 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i16x8.mul $push0=, $0, $1 +; CHECK-NEXT: return $pop0 + %mul = mul <4 x i16> %a, %b + ret <4 x i16> %mul +} + +define <2 x i32> @mul_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: mul_v2i32: +; CHECK: .functype mul_v2i32 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32x4.mul $push0=, $0, $1 +; CHECK-NEXT: return $pop0 + %mul = mul <2 x i32> %a, %b + ret <2 x i32> %mul +} diff --git a/llvm/test/CodeGen/WebAssembly/ref-test-func.ll b/llvm/test/CodeGen/WebAssembly/ref-test-func.ll new file mode 100644 index 0000000..ea2453f --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ref-test-func.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -mcpu=mvp -mattr=+reference-types -mattr=+gc -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK32 %s +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -mcpu=mvp -mattr=+reference-types -mattr=+gc -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK64 %s + +define void @test_fpsig_void_void(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_void_void: +; CHK32: .functype test_fpsig_void_void (i32) -> () +; CHK64: .functype test_fpsig_void_void (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: ref.test () -> () +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func) + tail call void @use(i32 noundef %res) #3 + ret void +} + +define void @test_fpsig_return_i32(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_return_i32: +; CHK32: .functype test_fpsig_return_i32 (i32) -> () +; CHK64: .functype test_fpsig_return_i32 (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: ref.test () -> (i32) +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, i32 0) + tail call void @use(i32 noundef %res) #3 + ret void +} + +define void @test_fpsig_return_i64(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_return_i64: +; CHK32: .functype test_fpsig_return_i64 (i32) -> () +; CHK64: .functype test_fpsig_return_i64 (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: ref.test () -> (i64) +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, i64 0) + tail call void @use(i32 noundef %res) #3 + ret void +} + +define void @test_fpsig_return_f32(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_return_f32: +; CHK32: .functype test_fpsig_return_f32 (i32) -> () +; CHK64: .functype test_fpsig_return_f32 (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: ref.test () -> (f32) +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, float 0.) + tail call void @use(i32 noundef %res) #3 + ret void +} + +define void @test_fpsig_return_f64(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_return_f64: +; CHK32: .functype test_fpsig_return_f64 (i32) -> () +; CHK64: .functype test_fpsig_return_f64 (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: ref.test () -> (f64) +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, double 0.) + tail call void @use(i32 noundef %res) #3 + ret void +} + + +define void @test_fpsig_param_i32(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_param_i32: +; CHK32: .functype test_fpsig_param_i32 (i32) -> () +; CHK64: .functype test_fpsig_param_i32 (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: ref.test (f64) -> () +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison, double 0.) + tail call void @use(i32 noundef %res) #3 + ret void +} + + +define void @test_fpsig_multiple_params_and_returns(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_multiple_params_and_returns: +; CHK32: .functype test_fpsig_multiple_params_and_returns (i32) -> () +; CHK64: .functype test_fpsig_multiple_params_and_returns (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: ref.test (i64, f32, i64) -> (i32, i64, f32, f64) +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, i32 0, i64 0, float 0., double 0., token poison, i64 0, float 0., i64 0) + tail call void @use(i32 noundef %res) #3 + ret void +} + + +define void @test_fpsig_ptrs(ptr noundef %func) local_unnamed_addr #0 { +; CHECK-LABEL: test_fpsig_ptrs: +; CHK32: .functype test_fpsig_ptrs (i32) -> () +; CHK64: .functype test_fpsig_ptrs (i64) -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 0 +; CHK64-NEXT: i32.wrap_i64 +; CHECK-NEXT: table.get __indirect_function_table +; CHK32-NEXT: ref.test (i32, i32) -> (i32) +; CHK64-NEXT: ref.test (i64, i64) -> (i64) +; CHECK-NEXT: call use +; CHECK-NEXT: # fallthrough-return +entry: + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, ptr null, token poison, ptr null, ptr null) + tail call void @use(i32 noundef %res) #3 + ret void +} + + +declare void @use(i32 noundef) local_unnamed_addr #1 diff --git a/llvm/test/CodeGen/WebAssembly/removed-terminator.ll b/llvm/test/CodeGen/WebAssembly/removed-terminator.ll new file mode 100644 index 0000000..188f6f6 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/removed-terminator.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O0 -verify-machineinstrs < %s | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +define void @test(i1 %x) { +; CHECK-LABEL: test: +; CHECK: .functype test (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const -1 +; CHECK-NEXT: i32.xor +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32.and +; CHECK-NEXT: drop +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: return + %y = xor i1 %x, true + ; This br_if's operand (%y) is stackified in RegStackify. But this terminator + ; will be removed in CFGSort after that. We need to make sure we unstackify %y + ; so that it can be dropped in ExplicitLocals. + br i1 %y, label %exit, label %exit + +exit: + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/returned.ll b/llvm/test/CodeGen/WebAssembly/returned.ll index e767e29..aef75d8 100644 --- a/llvm/test/CodeGen/WebAssembly/returned.ll +++ b/llvm/test/CodeGen/WebAssembly/returned.ll @@ -80,3 +80,27 @@ define i32 @test_second_arg(i32 %a, i32 %b) { %call = call i32 @do_something_else(i32 %a, i32 %b) ret i32 %b } + +define void @test() { +; CHECK-LABEL: test: +; CHECK: .functype test () -> () +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 16 +; CHECK-NEXT: i32.sub $push7=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push6=, $0=, $pop7 +; CHECK-NEXT: global.set __stack_pointer, $pop6 +; CHECK-NEXT: i32.const $push4=, 12 +; CHECK-NEXT: i32.add $push5=, $0, $pop4 +; CHECK-NEXT: call $drop=, returns_arg, $pop5 +; CHECK-NEXT: i32.const $push2=, 16 +; CHECK-NEXT: i32.add $push3=, $0, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return +entry: + %a = alloca i32 + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + %ret = call ptr @returns_arg(ptr %a) + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index e3607e1..36637e1 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -199,139 +199,17 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; SIMD128-LABEL: mul_v16i8: ; SIMD128: .functype mul_v16i8 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 0 -; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 0 -; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; SIMD128-NEXT: i8x16.splat $push6=, $pop5 -; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 1 -; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1 -; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; SIMD128-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2 -; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 2 -; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $1, 2 -; SIMD128-NEXT: i32.mul $push10=, $pop9, $pop8 -; SIMD128-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10 -; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 3 -; SIMD128-NEXT: i8x16.extract_lane_u $push12=, $1, 3 -; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14 -; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 4 -; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 4 -; SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 -; SIMD128-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18 -; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 5 -; SIMD128-NEXT: i8x16.extract_lane_u $push20=, $1, 5 -; SIMD128-NEXT: i32.mul $push22=, $pop21, $pop20 -; SIMD128-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22 -; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 6 -; SIMD128-NEXT: i8x16.extract_lane_u $push24=, $1, 6 -; SIMD128-NEXT: i32.mul $push26=, $pop25, $pop24 -; SIMD128-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26 -; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 7 -; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $1, 7 -; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; SIMD128-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30 -; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 8 -; SIMD128-NEXT: i8x16.extract_lane_u $push32=, $1, 8 -; SIMD128-NEXT: i32.mul $push34=, $pop33, $pop32 -; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34 -; SIMD128-NEXT: i8x16.extract_lane_u $push37=, $0, 9 -; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 9 -; SIMD128-NEXT: i32.mul $push38=, $pop37, $pop36 -; SIMD128-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38 -; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $0, 10 -; SIMD128-NEXT: i8x16.extract_lane_u $push40=, $1, 10 -; SIMD128-NEXT: i32.mul $push42=, $pop41, $pop40 -; SIMD128-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42 -; SIMD128-NEXT: i8x16.extract_lane_u $push45=, $0, 11 -; SIMD128-NEXT: i8x16.extract_lane_u $push44=, $1, 11 -; SIMD128-NEXT: i32.mul $push46=, $pop45, $pop44 -; SIMD128-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46 -; SIMD128-NEXT: i8x16.extract_lane_u $push49=, $0, 12 -; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $1, 12 -; SIMD128-NEXT: i32.mul $push50=, $pop49, $pop48 -; SIMD128-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50 -; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 13 -; SIMD128-NEXT: i8x16.extract_lane_u $push52=, $1, 13 -; SIMD128-NEXT: i32.mul $push54=, $pop53, $pop52 -; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54 -; SIMD128-NEXT: i8x16.extract_lane_u $push57=, $0, 14 -; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 14 -; SIMD128-NEXT: i32.mul $push58=, $pop57, $pop56 -; SIMD128-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58 -; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $0, 15 -; SIMD128-NEXT: i8x16.extract_lane_u $push60=, $1, 15 -; SIMD128-NEXT: i32.mul $push62=, $pop61, $pop60 -; SIMD128-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62 -; SIMD128-NEXT: return $pop63 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $1 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: return $pop2 ; ; SIMD128-FAST-LABEL: mul_v16i8: ; SIMD128-FAST: .functype mul_v16i8 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push5=, $0, 0 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $1, 0 -; SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; SIMD128-FAST-NEXT: i8x16.splat $push7=, $pop6 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push2=, $0, 1 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1 -; SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push8=, $pop7, 1, $pop3 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push10=, $0, 2 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push9=, $1, 2 -; SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push12=, $pop8, 2, $pop11 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 3 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push13=, $1, 3 -; SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop12, 3, $pop15 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push18=, $0, 4 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 4 -; SIMD128-FAST-NEXT: i32.mul $push19=, $pop18, $pop17 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push20=, $pop16, 4, $pop19 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $0, 5 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push21=, $1, 5 -; SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push24=, $pop20, 5, $pop23 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push26=, $0, 6 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push25=, $1, 6 -; SIMD128-FAST-NEXT: i32.mul $push27=, $pop26, $pop25 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push28=, $pop24, 6, $pop27 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push30=, $0, 7 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $1, 7 -; SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push32=, $pop28, 7, $pop31 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 8 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push33=, $1, 8 -; SIMD128-FAST-NEXT: i32.mul $push35=, $pop34, $pop33 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop32, 8, $pop35 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push38=, $0, 9 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 9 -; SIMD128-FAST-NEXT: i32.mul $push39=, $pop38, $pop37 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push40=, $pop36, 9, $pop39 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $0, 10 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push41=, $1, 10 -; SIMD128-FAST-NEXT: i32.mul $push43=, $pop42, $pop41 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push44=, $pop40, 10, $pop43 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push46=, $0, 11 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push45=, $1, 11 -; SIMD128-FAST-NEXT: i32.mul $push47=, $pop46, $pop45 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push48=, $pop44, 11, $pop47 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push50=, $0, 12 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $1, 12 -; SIMD128-FAST-NEXT: i32.mul $push51=, $pop50, $pop49 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push52=, $pop48, 12, $pop51 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 13 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push53=, $1, 13 -; SIMD128-FAST-NEXT: i32.mul $push55=, $pop54, $pop53 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop52, 13, $pop55 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push58=, $0, 14 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 14 -; SIMD128-FAST-NEXT: i32.mul $push59=, $pop58, $pop57 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push60=, $pop56, 14, $pop59 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $0, 15 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push61=, $1, 15 -; SIMD128-FAST-NEXT: i32.mul $push63=, $pop62, $pop61 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop60, 15, $pop63 +; SIMD128-FAST-NEXT: i16x8.extmul_low_i8x16_u $push2=, $0, $1 +; SIMD128-FAST-NEXT: i16x8.extmul_high_i8x16_u $push1=, $0, $1 +; SIMD128-FAST-NEXT: i8x16.shuffle $push0=, $pop2, $pop1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: mul_v16i8: diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll index 8459ec8..b355a0d 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -441,3 +441,31 @@ define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) { %a = fpext <2 x float> %v to <2 x double> ret <2 x double> %a } + +define <4 x float> @convert_u_v4f32_maybeneg(<4 x i32> %x) { +; CHECK-LABEL: convert_u_v4f32_maybeneg: +; CHECK: .functype convert_u_v4f32_maybeneg (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32x4.shr_s +; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: # fallthrough-return + %a = ashr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> + %b = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %b +} + +define <4 x float> @convert_u_v4f32_nonneg(<4 x i32> %x) { +; CHECK-LABEL: convert_u_v4f32_nonneg: +; CHECK: .functype convert_u_v4f32_nonneg (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32x4.shr_u +; CHECK-NEXT: f32x4.convert_i32x4_s +; CHECK-NEXT: # fallthrough-return + %a = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> + %b = uitofp <4 x i32> %a to <4 x float> + ret <4 x float> %b +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll index c93b8aa..eb39f90 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-extending-convert.ll @@ -12,7 +12,7 @@ define <4 x float> @extend_to_float_low_i16x8_u(<8 x i16> %x) { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %low = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %extended = uitofp <4 x i16> %low to <4 x float> @@ -25,7 +25,7 @@ define <4 x float> @extend_to_float_high_i16x8_u(<8 x i16> %x) { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.extend_high_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %high = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %extended = uitofp <4 x i16> %high to <4 x float> @@ -39,7 +39,7 @@ define <4 x float> @extend_to_float_low_i8x16_u(<8 x i8> %x) { ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i16x8.extend_low_i8x16_u ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %low = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> %extended = uitofp <4 x i8> %low to <4 x float> @@ -55,7 +55,7 @@ define <4 x float> @extend_to_float_high_i8x16_u(<8 x i8> %x) { ; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK-NEXT: i16x8.extend_low_i8x16_u ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f32x4.convert_i32x4_u +; CHECK-NEXT: f32x4.convert_i32x4_s ; CHECK-NEXT: # fallthrough-return %high = shufflevector <8 x i8> %x, <8 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %extended = uitofp <4 x i8> %high to <4 x float> @@ -136,7 +136,7 @@ define <2 x double> @extend_to_double_low_i16x4_u(<4 x i16> %x) { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.extend_low_i16x8_u -; CHECK-NEXT: f64x2.convert_low_i32x4_u +; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: # fallthrough-return %low = shufflevector <4 x i16> %x, <4 x i16> undef, <2 x i32> <i32 0, i32 1> %extended = uitofp <2 x i16> %low to <2 x double> diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll new file mode 100644 index 0000000..6e2d860 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT + +target triple = "wasm32" + +define double @fsub_fmul_contract_f64(double %a, double %b, double %c) { +; RELAXED-LABEL: fsub_fmul_contract_f64: +; RELAXED: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64.mul $push0=, $1, $0 +; RELAXED-NEXT: f64.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_contract_f64: +; STRICT: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64.mul $push0=, $1, $0 +; STRICT-NEXT: f64.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract double %b, %a + %sub = fsub contract double %c, %mul + ret double %sub +} + +define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fsub_fmul_contract_4xf32: +; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_4xf32: +; STRICT: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $1, $0 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <4 x float> %b, %a + %sub = fsub contract <4 x float> %c, %mul + ret <4 x float> %sub +} + + +define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fsub_fmul_contract_8xf16: +; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_8xf16: +; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.mul $push0=, $1, $0 +; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <8 x half> %b, %a + %sub = fsub contract <8 x half> %c, %mul + ret <8 x half> %sub +} + + +define <4 x float> @fsub_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fsub_fmul_4xf32: +; RELAXED: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.mul $push0=, $1, $0 +; RELAXED-NEXT: f32x4.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_4xf32: +; STRICT: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $1, $0 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul <4 x float> %b, %a + %sub = fsub contract <4 x float> %c, %mul + ret <4 x float> %sub +} + +define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; RELAXED-LABEL: fsub_fmul_contract_8xf32: +; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2 +; RELAXED-NEXT: v128.store 16($0), $pop0 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1 +; RELAXED-NEXT: v128.store 0($0), $pop1 +; RELAXED-NEXT: return +; +; STRICT-LABEL: fsub_fmul_contract_8xf32: +; STRICT: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $4, $2 +; STRICT-NEXT: f32x4.sub $push1=, $6, $pop0 +; STRICT-NEXT: v128.store 16($0), $pop1 +; STRICT-NEXT: f32x4.mul $push2=, $3, $1 +; STRICT-NEXT: f32x4.sub $push3=, $5, $pop2 +; STRICT-NEXT: v128.store 0($0), $pop3 +; STRICT-NEXT: return + %mul = fmul contract <8 x float> %b, %a + %sub = fsub contract <8 x float> %c, %mul + ret <8 x float> %sub +} + + +define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fsub_fmul_contract_2xf64: +; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_2xf64: +; STRICT: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $1, $0 +; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <2 x double> %b, %a + %sub = fsub contract <2 x double> %c, %mul + ret <2 x double> %sub +} + +define float @fsub_fmul_contract_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fsub_fmul_contract_f32: +; RELAXED: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $1, $0 +; RELAXED-NEXT: f32.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_contract_f32: +; STRICT: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $1, $0 +; STRICT-NEXT: f32.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract float %b, %a + %sub = fsub contract float %c, %mul + ret float %sub +} + diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll index 1c77ad5..60cfc27 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll @@ -70,9 +70,9 @@ target triple = "wasm32-unknown-unknown" ; +call-indirect-overlong, +exception-handling, ; +extended-const, +fp16, +multimemory, +multivalue, ; +mutable-globals, +nontrapping-fptoint, +relaxed-simd, -; +reference-types, +simd128, +sign-ext, +tail-call +; +reference-types, +simd128, +sign-ext, +tail-call, +gc ; BLEEDING-EDGE-LABEL: .section .custom_section.target_features,"",@ -; BLEEDING-EDGE-NEXT: .int8 16 +; BLEEDING-EDGE-NEXT: .int8 17 ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 7 ; BLEEDING-EDGE-NEXT: .ascii "atomics" @@ -95,6 +95,9 @@ target triple = "wasm32-unknown-unknown" ; BLEEDING-EDGE-NEXT: .int8 4 ; BLEEDING-EDGE-NEXT: .ascii "fp16" ; BLEEDING-EDGE-NEXT: .int8 43 +; BLEEDING-EDGE-NEXT: .int8 2 +; BLEEDING-EDGE-NEXT: .ascii "gc" +; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 11 ; BLEEDING-EDGE-NEXT: .ascii "multimemory" ; BLEEDING-EDGE-NEXT: .int8 43 diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll index 1d194b6..4c30a3a 100644 --- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll +++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll @@ -116,40 +116,28 @@ define i8 @pairwise_mul_v16i8(<16 x i8> %arg) { ; SIMD128-LABEL: pairwise_mul_v16i8: ; SIMD128: .functype pairwise_mul_v16i8 (v128) -> (i32) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $0, 0 -; SIMD128-NEXT: i8x16.shuffle $push32=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 -; SIMD128-NEXT: local.tee $push31=, $1=, $pop32 -; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $pop31, 0 -; SIMD128-NEXT: i32.mul $push27=, $pop26, $pop25 -; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4 -; SIMD128-NEXT: i8x16.extract_lane_u $push22=, $1, 4 -; SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; SIMD128-NEXT: i32.mul $push28=, $pop27, $pop24 -; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 2 -; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $1, 2 -; SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $0, 6 -; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $1, 6 -; SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 -; SIMD128-NEXT: i32.mul $push21=, $pop20, $pop17 -; SIMD128-NEXT: i32.mul $push29=, $pop28, $pop21 -; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 1 -; SIMD128-NEXT: i8x16.extract_lane_u $push10=, $1, 1 -; SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $0, 5 -; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $1, 5 -; SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; SIMD128-NEXT: i32.mul $push13=, $pop12, $pop9 -; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 3 -; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 3 -; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 7 -; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 7 -; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; SIMD128-NEXT: i32.mul $push6=, $pop5, $pop2 -; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop6 -; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop14 -; SIMD128-NEXT: return $pop30 +; SIMD128-NEXT: i8x16.shuffle $push20=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push19=, $1=, $pop20 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $pop19 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push18=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push17=, $0=, $pop18 +; SIMD128-NEXT: i8x16.shuffle $push16=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push15=, $1=, $pop16 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push3=, $pop17, $pop15 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push2=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push14=, $pop3, $pop2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push13=, $0=, $pop14 +; SIMD128-NEXT: i8x16.shuffle $push12=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push11=, $1=, $pop12 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push5=, $pop13, $pop11 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push4=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push10=, $pop5, $pop4, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push9=, $0=, $pop10 +; SIMD128-NEXT: i8x16.shuffle $push6=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push7=, $pop9, $pop6 +; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $pop7, 0 +; SIMD128-NEXT: return $pop8 %res = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %arg) ret i8 %res } |