diff options
Diffstat (limited to 'llvm/test')
28 files changed, 1695 insertions, 2765 deletions
diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index b2635d3..3685e9c 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -730,111 +730,6 @@ entry: ret void } -define void @store_factor8(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3, - <4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7) { -; CHECK-LABEL: store_factor8: -; CHECK: .Lfunc_begin17: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: // %bb.0: -; CHECK: zip1 [[V1:.*s]], [[I1:.*s]], [[I5:.*s]] -; CHECK-NEXT: zip2 [[V5:.*s]], [[I1]], [[I5]] -; CHECK-NEXT: zip1 [[V2:.*s]], [[I2:.*s]], [[I6:.*s]] -; CHECK-NEXT: zip2 [[V6:.*s]], [[I2]], [[I6]] -; CHECK-NEXT: zip1 [[V3:.*s]], [[I3:.*s]], [[I7:.*s]] -; CHECK-NEXT: zip2 [[V7:.*s]], [[I3]], [[I7]] -; CHECK-NEXT: zip1 [[V4:.*s]], [[I4:.*s]], [[I8:.*s]] -; CHECK-NEXT: zip2 [[V8:.*s]], [[I4]], [[I8]] -; CHECK-NEXT: st4 { [[V1]], [[V2]], [[V3]], [[V4]] }, [x0], #64 -; CHECK-NEXT: st4 { [[V5]], [[V6]], [[V7]], [[V8]] }, [x0] -; CHECK-NEXT: ret - - %v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - - %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - - %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> - store <32 x i32> %interleaved.vec, ptr %ptr, align 4 - ret void -} - -define void @store_factor16(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3, - <4 x i32> %a4, <4 x i32> %a5, <4 x i32> %a6, <4 x i32> %a7, - <4 x i32> %a8, <4 x i32> %a9, <4 x i32> %a10, <4 x i32> %a11, - <4 x i32> %a12, <4 x i32> %a13, <4 x i32> %a14, <4 x i32> %a15) { -; CHECK-LABEL: store_factor16: -; CHECK: .Lfunc_begin18: -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: // %bb.0: -; CHECK: zip1 [[V05:.*s]], [[I05:.*s]], [[I13:.*s]] -; CHECK-NEXT: zip1 [[V01:.*s]], [[I01:.*s]], [[I09:.*s]] -; CHECK-NEXT: zip1 [[V02:.*s]], [[I02:.*s]], [[I10:.*s]] -; CHECK-NEXT: zip1 [[V06:.*s]], [[I06:.*s]], [[I14:.*s]] -; CHECK-NEXT: zip1 [[V07:.*s]], [[I07:.*s]], [[I15:.*s]] -; CHECK-NEXT: zip2 [[V09:.*s]], [[I01]], [[I09]] -; CHECK-NEXT: zip2 [[V13:.*s]], [[I05]], [[I13]] -; CHECK-NEXT: zip1 [[V03:.*s]], [[I03:.*s]], [[I11:.*s]] -; CHECK-NEXT: zip1 [[V04:.*s]], [[I04:.*s]], [[I12:.*s]] -; CHECK-NEXT: zip1 [[V08:.*s]], [[I08:.*s]], [[I16:.*s]] -; CHECK-NEXT: zip2 [[V10:.*s]], [[I02]], [[I10]] -; CHECK-NEXT: zip2 [[V14:.*s]], [[I06]], [[I14]] -; CHECK-NEXT: zip2 [[V11:.*s]], [[I03]], [[I11]] -; CHECK-NEXT: zip1 [[V17:.*s]], [[V01]], [[V05]] -; CHECK-NEXT: zip2 [[V15:.*s]], [[I07]], [[I15]] -; CHECK-NEXT: zip2 [[V21:.*s]], [[V01]], [[V05]] -; CHECK-NEXT: zip1 [[V18:.*s]], [[V02]], [[V06]] -; CHECK-NEXT: zip2 [[V12:.*s]], [[I04]], [[I12]] -; CHECK-NEXT: zip2 [[V16:.*s]], [[I08]], [[I16]] -; CHECK-NEXT: zip1 [[V19:.*s]], [[V03]], [[V07]] -; CHECK-NEXT: zip2 [[V22:.*s]], [[V02]], [[V06]] -; CHECK-NEXT: zip1 [[V25:.*s]], [[V09]], [[V13]] -; CHECK-NEXT: zip1 [[V20:.*s]], [[V04]], [[V08]] -; CHECK-NEXT: zip2 [[V23:.*s]], [[V03]], [[V07]] -; CHECK-NEXT: zip1 [[V26:.*s]], [[V10]], [[V14]] -; CHECK-NEXT: zip2 [[V29:.*s]], [[V09]], [[V13]] -; CHECK-NEXT: zip2 [[V24:.*s]], [[V04]], [[V08]] -; CHECK-NEXT: zip1 [[V27:.*s]], [[V11]], [[V15]] -; CHECK-NEXT: zip2 [[V30:.*s]], [[V10]], [[V14]] -; CHECK-NEXT: zip1 [[V28:.*s]], [[V12]], [[V16]] -; CHECK-NEXT: zip2 [[V31:.*s]], [[V11]], [[V15]] -; CHECK-NEXT: zip2 [[V32:.*s]], [[V12]], [[V16]] -; CHECK-NEXT: st4 { [[V17]], [[V18]], [[V19]], [[V20]] }, [x8], #64 -; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: st4 { [[V21]], [[V22]], [[V23]], [[V24]] }, [x8] -; CHECK-NEXT: add x8, x0, #128 -; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: st4 { [[V25]], [[V26]], [[V27]], [[V28]] }, [x8] -; CHECK-NEXT: add x8, x0, #192 -; CHECK-NEXT: st4 { [[V29]], [[V30]], [[V31]], [[V32]] }, [x8] -; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload -; CHECK-NEXT: ret - - %v0 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v1 = shufflevector <4 x i32> %a2, <4 x i32> %a3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v2 = shufflevector <4 x i32> %a4, <4 x i32> %a5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v3 = shufflevector <4 x i32> %a6, <4 x i32> %a7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v4 = shufflevector <4 x i32> %a8, <4 x i32> %a9, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v5 = shufflevector <4 x i32> %a10, <4 x i32> %a11, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v6 = shufflevector <4 x i32> %a12, <4 x i32> %a13, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %v7 = shufflevector <4 x i32> %a14, <4 x i32> %a15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - - %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %s2 = shufflevector <8 x i32> %v4, <8 x i32> %v5, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - %s3 = shufflevector <8 x i32> %v6, <8 x i32> %v7, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> - - %d0 = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> - %d1 = shufflevector <16 x i32> %s2, <16 x i32> %s3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> - - %interleaved.vec = shufflevector <32 x i32> %d0, <32 x i32> %d1, <64 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> - store <64 x i32> %interleaved.vec, ptr %ptr, align 4 - ret void -} - declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/CodeGen/ARM/ldexp-fp128.ll b/llvm/test/CodeGen/ARM/ldexp-fp128.ll new file mode 100644 index 0000000..93fcd39e8 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ldexp-fp128.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=armv7-unknown-linux < %s | FileCheck -check-prefix=LINUX %s + +define fp128 @testExpl(fp128 %val, i32 %a) { +; LINUX-LABEL: testExpl: +; LINUX: @ %bb.0: +; LINUX-NEXT: push {r11, lr} +; LINUX-NEXT: sub sp, sp, #8 +; LINUX-NEXT: ldr r12, [sp, #16] +; LINUX-NEXT: str r12, [sp] +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: add sp, sp, #8 +; LINUX-NEXT: pop {r11, pc} + %call = tail call fp128 @ldexpl(fp128 %val, i32 %a) + ret fp128 %call +} + +declare fp128 @ldexpl(fp128, i32) memory(none) + +define fp128 @test_ldexp_f128_i32(fp128 %val, i32 %a) { +; LINUX-LABEL: test_ldexp_f128_i32: +; LINUX: @ %bb.0: +; LINUX-NEXT: push {r11, lr} +; LINUX-NEXT: sub sp, sp, #8 +; LINUX-NEXT: ldr r12, [sp, #16] +; LINUX-NEXT: str r12, [sp] +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: add sp, sp, #8 +; LINUX-NEXT: pop {r11, pc} + %call = tail call fp128 @llvm.ldexp.f128.i32(fp128 %val, i32 %a) + ret fp128 %call +} + +define <2 x fp128> @test_ldexp_v2f128_v2i32(<2 x fp128> %val, <2 x i32> %a) { +; LINUX-LABEL: test_ldexp_v2f128_v2i32: +; LINUX: @ %bb.0: +; LINUX-NEXT: push {r4, r5, r6, lr} +; LINUX-NEXT: vpush {d8} +; LINUX-NEXT: sub sp, sp, #8 +; LINUX-NEXT: mov r5, r3 +; LINUX-NEXT: add r3, sp, #40 +; LINUX-NEXT: mov r6, r2 +; LINUX-NEXT: mov r4, r0 +; LINUX-NEXT: ldm r3, {r0, r1, r2, r3} +; LINUX-NEXT: vldr d8, [sp, #56] +; LINUX-NEXT: vst1.32 {d8[1]}, [sp:32] +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: ldr r12, [sp, #32] +; LINUX-NEXT: vst1.32 {d8[0]}, [sp:32] +; LINUX-NEXT: ldr lr, [sp, #36] +; LINUX-NEXT: str r0, [r4, #16] +; LINUX-NEXT: mov r0, r6 +; LINUX-NEXT: str r1, [r4, #20] +; LINUX-NEXT: mov r1, r5 +; LINUX-NEXT: str r2, [r4, #24] +; LINUX-NEXT: mov r2, r12 +; LINUX-NEXT: str r3, [r4, #28] +; LINUX-NEXT: mov r3, lr +; LINUX-NEXT: bl ldexpl +; LINUX-NEXT: stm r4, {r0, r1, r2, r3} +; LINUX-NEXT: add sp, sp, #8 +; LINUX-NEXT: vpop {d8} +; LINUX-NEXT: pop {r4, r5, r6, pc} + %call = tail call <2 x fp128> @llvm.ldexp.v2f128.v2i32(<2 x fp128> %val, <2 x i32> %a) + ret <2 x fp128> %call +} diff --git a/llvm/test/CodeGen/LoongArch/ldptr.ll b/llvm/test/CodeGen/LoongArch/ldptr.ll index c3656a6..9bafa10 100644 --- a/llvm/test/CodeGen/LoongArch/ldptr.ll +++ b/llvm/test/CodeGen/LoongArch/ldptr.ll @@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind { ; LA32-LABEL: ldptr_w: ; LA32: # %bb.0: # %entry ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_w: @@ -81,10 +80,9 @@ entry: define i64 @ldptr_d(ptr %p) nounwind { ; LA32-LABEL: ldptr_d: ; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a1, $a0, 1 -; LA32-NEXT: ld.w $a0, $a1, 0 -; LA32-NEXT: ld.w $a1, $a1, 4 +; LA32-NEXT: addi.w $a1, $a0, 2047 +; LA32-NEXT: ld.w $a0, $a1, 1 +; LA32-NEXT: ld.w $a1, $a1, 5 ; LA32-NEXT: ret ; ; LA64-LABEL: ldptr_d: diff --git a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll index 9a806a1..93f73e5 100644 --- a/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll +++ b/llvm/test/CodeGen/LoongArch/sink-fold-addi.ll @@ -25,14 +25,13 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB0_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -45,8 +44,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: ld.w $a0, $s2, 4 -; LA32-NEXT: ld.w $a1, $s2, 0 +; LA32-NEXT: ld.w $a0, $s2, 12 +; LA32-NEXT: ld.w $a1, $s2, 8 ; LA32-NEXT: add.w $a0, $a0, $s6 ; LA32-NEXT: add.w $s3, $a1, $s3 ; LA32-NEXT: sltu $a1, $s3, $a1 @@ -63,8 +62,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s3, $zero ; LA32-NEXT: move $s6, $zero ; LA32-NEXT: .LBB0_4: # %for.cond.cleanup -; LA32-NEXT: st.w $s3, $s2, 0 -; LA32-NEXT: st.w $s6, $s2, 4 +; LA32-NEXT: st.w $s3, $s2, 8 +; LA32-NEXT: st.w $s6, $s2, 12 ; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload @@ -88,8 +87,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB0_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -100,7 +98,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: ld.d $a0, $s1, 0 +; LA64-NEXT: ld.d $a0, $s1, 8 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: add.d $s2, $a0, $s2 ; LA64-NEXT: bnez $s0, .LBB0_2 @@ -108,7 +106,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB0_3: ; LA64-NEXT: move $s2, $zero ; LA64-NEXT: .LBB0_4: # %for.cond.cleanup -; LA64-NEXT: st.d $s2, $s1, 0 +; LA64-NEXT: st.d $s2, $s1, 8 ; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload @@ -153,14 +151,13 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB1_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -172,7 +169,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: fld.s $fa0, $s2, 0 +; LA32-NEXT: fld.s $fa0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -185,7 +182,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB1_3: ; LA32-NEXT: movgr2fr.w $fs0, $zero ; LA32-NEXT: .LBB1_4: # %for.cond.cleanup -; LA32-NEXT: fst.s $fs0, $s2, 0 +; LA32-NEXT: fst.s $fs0, $s2, 16 ; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload @@ -208,8 +205,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB1_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -220,7 +216,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: fld.s $fa0, $s1, 0 +; LA64-NEXT: fld.s $fa0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: fadd.s $fs0, $fa0, $fs0 ; LA64-NEXT: bnez $s0, .LBB1_2 @@ -228,7 +224,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB1_3: ; LA64-NEXT: movgr2fr.w $fs0, $zero ; LA64-NEXT: .LBB1_4: # %for.cond.cleanup -; LA64-NEXT: fst.s $fs0, $s1, 0 +; LA64-NEXT: fst.s $fs0, $s1, 16 ; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload @@ -271,14 +267,13 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s0, $a3 ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 6 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB2_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -291,7 +286,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: vld $vr0, $s2, 0 +; LA32-NEXT: vld $vr0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -307,7 +302,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB2_3: ; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: .LBB2_4: # %for.cond.cleanup -; LA32-NEXT: vst $vr0, $s2, 0 +; LA32-NEXT: vst $vr0, $s2, 16 ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload @@ -326,8 +321,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill ; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill ; LA64-NEXT: slli.d $a0, $a0, 6 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $a1, .LBB2_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -340,7 +334,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: vld $vr0, $s1, 0 +; LA64-NEXT: vld $vr0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; LA64-NEXT: vadd.w $vr1, $vr0, $vr1 @@ -351,7 +345,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB2_3: ; LA64-NEXT: vrepli.b $vr0, 0 ; LA64-NEXT: .LBB2_4: # %for.cond.cleanup -; LA64-NEXT: vst $vr0, $s1, 0 +; LA64-NEXT: vst $vr0, $s1, 16 ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -393,14 +387,13 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s0, $a3 ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a0, $a0, 6 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 32 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB3_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -413,7 +406,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: xvld $xr0, $s2, 0 +; LA32-NEXT: xvld $xr0, $s2, 32 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -429,7 +422,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB3_3: ; LA32-NEXT: xvrepli.b $xr0, 0 ; LA32-NEXT: .LBB3_4: # %for.cond.cleanup -; LA32-NEXT: xvst $xr0, $s2, 0 +; LA32-NEXT: xvst $xr0, $s2, 32 ; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload @@ -448,8 +441,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill ; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill ; LA64-NEXT: slli.d $a0, $a0, 6 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 32 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $a1, .LBB3_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -462,7 +454,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: xvld $xr0, $s1, 0 +; LA64-NEXT: xvld $xr0, $s1, 32 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload ; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1 @@ -473,7 +465,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB3_3: ; LA64-NEXT: xvrepli.b $xr0, 0 ; LA64-NEXT: .LBB3_4: # %for.cond.cleanup -; LA64-NEXT: xvst $xr0, $s1, 0 +; LA64-NEXT: xvst $xr0, $s1, 32 ; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload @@ -516,14 +508,13 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 16 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB4_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -536,7 +527,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: vldrepl.b $vr0, $s2, 0 +; LA32-NEXT: vldrepl.b $vr0, $s2, 16 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -552,7 +543,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB4_3: ; LA32-NEXT: vrepli.b $vr0, 0 ; LA32-NEXT: .LBB4_4: # %for.cond.cleanup -; LA32-NEXT: vstelm.b $vr0, $s2, 0, 1 +; LA32-NEXT: vstelm.b $vr0, $s2, 16, 1 ; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload @@ -573,8 +564,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 16 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB4_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -586,7 +576,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: vldrepl.b $vr0, $s1, 0 +; LA64-NEXT: vldrepl.b $vr0, $s1, 16 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload ; LA64-NEXT: vadd.b $vr1, $vr0, $vr1 @@ -597,7 +587,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB4_3: ; LA64-NEXT: vrepli.b $vr0, 0 ; LA64-NEXT: .LBB4_4: # %for.cond.cleanup -; LA64-NEXT: vstelm.b $vr0, $s1, 0, 1 +; LA64-NEXT: vstelm.b $vr0, $s1, 16, 1 ; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload @@ -643,14 +633,13 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: move $s1, $a2 ; LA32-NEXT: slli.w $a1, $a0, 4 ; LA32-NEXT: alsl.w $a0, $a0, $a1, 3 -; LA32-NEXT: add.w $a0, $a4, $a0 ; LA32-NEXT: sltui $a1, $a3, 1 ; LA32-NEXT: slti $a2, $a3, 0 ; LA32-NEXT: masknez $a2, $a2, $a1 ; LA32-NEXT: sltui $a3, $s1, 1 ; LA32-NEXT: maskeqz $a1, $a3, $a1 ; LA32-NEXT: or $a1, $a1, $a2 -; LA32-NEXT: addi.w $s2, $a0, 8 +; LA32-NEXT: add.w $s2, $a4, $a0 ; LA32-NEXT: bnez $a1, .LBB5_3 ; LA32-NEXT: # %bb.1: # %for.body.preheader ; LA32-NEXT: move $fp, $a4 @@ -663,7 +652,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill ; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl f -; LA32-NEXT: xvldrepl.d $xr0, $s2, 0 +; LA32-NEXT: xvldrepl.d $xr0, $s2, 8 ; LA32-NEXT: addi.w $s3, $s3, 1 ; LA32-NEXT: sltui $a0, $s3, 1 ; LA32-NEXT: add.w $s4, $s4, $a0 @@ -679,7 +668,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA32-NEXT: .LBB5_3: ; LA32-NEXT: xvrepli.b $xr0, 0 ; LA32-NEXT: .LBB5_4: # %for.cond.cleanup -; LA32-NEXT: xvstelm.d $xr0, $s2, 0, 1 +; LA32-NEXT: xvstelm.d $xr0, $s2, 8, 1 ; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload ; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload @@ -700,8 +689,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $s0, $a1 ; LA64-NEXT: slli.d $a1, $a0, 4 ; LA64-NEXT: alsl.d $a0, $a0, $a1, 3 -; LA64-NEXT: add.d $a0, $a2, $a0 -; LA64-NEXT: addi.d $s1, $a0, 8 +; LA64-NEXT: add.d $s1, $a2, $a0 ; LA64-NEXT: blez $s0, .LBB5_3 ; LA64-NEXT: # %bb.1: # %for.body.preheader ; LA64-NEXT: move $fp, $a2 @@ -713,7 +701,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: move $a0, $fp ; LA64-NEXT: pcaddu18i $ra, %call36(f) ; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: xvldrepl.d $xr0, $s1, 0 +; LA64-NEXT: xvldrepl.d $xr0, $s1, 8 ; LA64-NEXT: addi.d $s0, $s0, -1 ; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload ; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1 @@ -724,7 +712,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind { ; LA64-NEXT: .LBB5_3: ; LA64-NEXT: xvrepli.b $xr0, 0 ; LA64-NEXT: .LBB5_4: # %for.cond.cleanup -; LA64-NEXT: xvstelm.d $xr0, $s1, 0, 1 +; LA64-NEXT: xvstelm.d $xr0, $s1, 8, 1 ; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/LoongArch/stptr.ll b/llvm/test/CodeGen/LoongArch/stptr.ll index d70f9f4..23b433a 100644 --- a/llvm/test/CodeGen/LoongArch/stptr.ll +++ b/llvm/test/CodeGen/LoongArch/stptr.ll @@ -23,8 +23,7 @@ define void @stptr_w(ptr %p, i32 signext %val) nounwind { ; LA32-LABEL: stptr_w: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_w: @@ -77,9 +76,8 @@ define void @stptr_d(ptr %p, i64 %val) nounwind { ; LA32-LABEL: stptr_d: ; LA32: # %bb.0: ; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 1 -; LA32-NEXT: st.w $a2, $a0, 4 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a2, $a0, 5 +; LA32-NEXT: st.w $a1, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: stptr_d: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index 50bd22b..f4964288 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -205,12 +205,19 @@ define i64 @addmul20(i64 %a, i64 %b) { } define i64 @addmul22(i64 %a, i64 %b) { -; CHECK-LABEL: addmul22: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 22 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addmul22: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 22 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: addmul22: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a2, a0, a0, 2 +; RV64XTHEADBA-NEXT: th.addsl a0, a0, a2, 1 +; RV64XTHEADBA-NEXT: th.addsl a0, a1, a0, 1 +; RV64XTHEADBA-NEXT: ret %c = mul i64 %a, 22 %d = add i64 %c, %b ret i64 %d diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 7fd7626..d4b2288 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -585,6 +585,33 @@ define i64 @addmul12(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul14(i64 %a, i64 %b) { +; RV64I-LABEL: addmul14: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a2, a0, 1 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul14: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul14: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 14 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul18(i64 %a, i64 %b) { ; RV64I-LABEL: addmul18: ; RV64I: # %bb.0: @@ -636,12 +663,26 @@ define i64 @addmul20(i64 %a, i64 %b) { } define i64 @addmul22(i64 %a, i64 %b) { -; CHECK-LABEL: addmul22: -; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 22 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: addmul22: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 22 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul22: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul22: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret %c = mul i64 %a, 22 %d = add i64 %c, %b ret i64 %d @@ -672,6 +713,32 @@ define i64 @addmul24(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul26(i64 %a, i64 %b) { +; RV64I-LABEL: addmul26: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 26 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul26: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul26: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.h a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 26 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul36(i64 %a, i64 %b) { ; RV64I-LABEL: addmul36: ; RV64I: # %bb.0: @@ -722,6 +789,58 @@ define i64 @addmul40(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul38(i64 %a, i64 %b) { +; RV64I-LABEL: addmul38: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 38 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul38: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul38: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 38 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul42(i64 %a, i64 %b) { +; RV64I-LABEL: addmul42: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 42 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul42: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul42: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 42 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @addmul72(i64 %a, i64 %b) { ; RV64I-LABEL: addmul72: ; RV64I: # %bb.0: @@ -747,6 +866,84 @@ define i64 @addmul72(i64 %a, i64 %b) { ret i64 %d } +define i64 @addmul74(i64 %a, i64 %b) { +; RV64I-LABEL: addmul74: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 74 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul74: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul74: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.w a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 74 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul82(i64 %a, i64 %b) { +; RV64I-LABEL: addmul82: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 82 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul82: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a2, a0, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul82: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.w a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 82 + %d = add i64 %c, %b + ret i64 %d +} + +define i64 @addmul146(i64 %a, i64 %b) { +; RV64I-LABEL: addmul146: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 146 +; RV64I-NEXT: mul a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: addmul146: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a2, a0, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret +; +; RV64XANDESPERF-LABEL: addmul146: +; RV64XANDESPERF: # %bb.0: +; RV64XANDESPERF-NEXT: nds.lea.d a2, a0, a0 +; RV64XANDESPERF-NEXT: nds.lea.d a0, a0, a2 +; RV64XANDESPERF-NEXT: nds.lea.h a0, a1, a0 +; RV64XANDESPERF-NEXT: ret + %c = mul i64 %a, 146 + %d = add i64 %c, %b + ret i64 %d +} + define i64 @mul50(i64 %a) { ; RV64I-LABEL: mul50: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll index d8e2b2c..305ab93 100644 --- a/llvm/test/CodeGen/RISCV/zicond-opts.ll +++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll @@ -263,3 +263,35 @@ define i64 @test_inv_and_eqz(i64 %f, i64 %x, i1 %cond) { %7 = and i64 %6, %f ret i64 %7 } + +define i32 @pr166596(i32 %conv.i, i1 %iszero) #0 { +; RV32ZICOND-LABEL: pr166596: +; RV32ZICOND: # %bb.0: # %entry +; RV32ZICOND-NEXT: andi a1, a1, 1 +; RV32ZICOND-NEXT: xori a0, a0, 1 +; RV32ZICOND-NEXT: zext.h a0, a0 +; RV32ZICOND-NEXT: clz a0, a0 +; RV32ZICOND-NEXT: addi a0, a0, 41 +; RV32ZICOND-NEXT: czero.nez a0, a0, a1 +; RV32ZICOND-NEXT: addi a0, a0, -9 +; RV32ZICOND-NEXT: ret +; +; RV64ZICOND-LABEL: pr166596: +; RV64ZICOND: # %bb.0: # %entry +; RV64ZICOND-NEXT: andi a1, a1, 1 +; RV64ZICOND-NEXT: xori a0, a0, 1 +; RV64ZICOND-NEXT: zext.h a0, a0 +; RV64ZICOND-NEXT: clz a0, a0 +; RV64ZICOND-NEXT: addi a0, a0, 9 +; RV64ZICOND-NEXT: czero.nez a0, a0, a1 +; RV64ZICOND-NEXT: addi a0, a0, -9 +; RV64ZICOND-NEXT: ret +entry: + %not.i = xor i32 %conv.i, 1 + %conv2.i = trunc i32 %not.i to i16 + %conv22 = zext i16 %conv2.i to i64 + %0 = call i64 @llvm.ctlz.i64(i64 %conv22, i1 false) + %cast = trunc i64 %0 to i32 + %clzg = select i1 %iszero, i32 -9, i32 %cast + ret i32 %clzg +} diff --git a/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll b/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll new file mode 100644 index 0000000..677291a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/allow_unknown_intrinsics.ll @@ -0,0 +1,36 @@ +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics %s -o %t.spvt 2>&1 | FileCheck -check-prefix=CHECK-ERROR %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=notllvm %s -o %t.spvt 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s +; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm.some.custom %s -o %t.spvt 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm. %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm.,random.prefix %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spv-allow-unknown-intrinsics=llvm. %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %} + +; The test checks command-line option which allows to represent unknown +; intrinsics as external function calls in SPIR-V. + +; CHECK-ERROR: LLVM ERROR: unable to legalize instruction: %3:iid(s64) = G_READCYCLECOUNTER (in function: foo) + +; CHECK: Name %[[READCYCLECOUNTER:[0-9]+]] "spirv.llvm_readcyclecounter" +; CHECK: Name %[[SOME_CUSTOM_INTRINSIC:[0-9]+]] "spirv.llvm_some_custom_intrinsic" +; CHECK-DAG: Decorate %[[READCYCLECOUNTER]] LinkageAttributes {{.*}} Import +; CHECK: Decorate %[[SOME_CUSTOM_INTRINSIC]] LinkageAttributes {{.*}} Import +; CHECK-DAG: %[[I64:[0-9]+]] = OpTypeInt 64 +; CHECK: %[[FnTy:[0-9]+]] = OpTypeFunction %[[I64]] +; CHECK: %[[READCYCLECOUNTER]] = OpFunction %[[I64]] {{.*}} %[[FnTy]] +; CHECK-DAG: %[[SOME_CUSTOM_INTRINSIC]] = OpFunction %[[I64]] {{.*}} %[[FnTy]] +; CHECK-DAG: OpFunctionCall %[[I64]] %[[READCYCLECOUNTER]] +; CHECK: OpFunctionCall %[[I64]] %[[SOME_CUSTOM_INTRINSIC]] + +define spir_func void @foo() { +entry: +; TODO: if and when the SPIR-V learns how to lower readcyclecounter, we will have to pick another unhandled intrinsic + %0 = call i64 @llvm.readcyclecounter() + %1 = call i64 @llvm.some.custom.intrinsic() + ret void +} + +declare i64 @llvm.readcyclecounter() +declare i64 @llvm.some.custom.intrinsic() diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll new file mode 100644 index 0000000..f6b6115 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_ALTERA_blocking_pipes/PipeBlocking.ll @@ -0,0 +1,98 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_ALTERA_blocking_pipes %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV +; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_blocking_pipes %s -o - -filetype=obj | spirv-val %} + +%opencl.pipe_ro_t = type opaque +%opencl.pipe_wo_t = type opaque + +; CHECK-SPIRV: OpCapability BlockingPipesALTERA +; CHECK-SPIRV: OpExtension "SPV_ALTERA_blocking_pipes" +; CHECK-SPIRV: %[[PipeRTy:[0-9]+]] = OpTypePipe ReadOnly +; CHECK-SPIRV: %[[PipeWTy:[0-9]+]] = OpTypePipe WriteOnly +; CHECK-SPIRV: %[[PipeR1:[0-9]+]] = OpLoad %[[PipeRTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpReadPipeBlockingALTERA %[[PipeR1]] %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[PipeR2:[0-9]+]] = OpLoad %[[PipeRTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpReadPipeBlockingALTERA %[[PipeR2]] %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[PipeW1:[0-9]+]] = OpLoad %[[PipeWTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpWritePipeBlockingALTERA %[[PipeW1]] %[[#]] %[[#]] %[[#]] +; CHECK-SPIRV: %[[PipeW2:[0-9]+]] = OpLoad %[[PipeWTy]] %[[#]] Aligned 8 +; CHECK-SPIRV: OpWritePipeBlockingALTERA %[[PipeW2]] %[[#]] %[[#]] %[[#]] + +define spir_func void @foo(target("spirv.Pipe", 0) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 0), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 0) %p, target("spirv.Pipe", 0)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 0), target("spirv.Pipe", 0)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePiii(target("spirv.Pipe", 0) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePiii(target("spirv.Pipe", 0), ptr addrspace(4), i32, i32) + +define spir_func void @bar(target("spirv.Pipe", 0) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 0), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 0) %p, target("spirv.Pipe", 0)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 0), target("spirv.Pipe", 0)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePvii(target("spirv.Pipe", 0) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z29__spirv_ReadPipeBlockingINTELIiEv8ocl_pipePvii(target("spirv.Pipe", 0), ptr addrspace(4), i32, i32) + +define spir_func void @boo(target("spirv.Pipe", 1) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 1), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 1) %p, target("spirv.Pipe", 1)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePiii(target("spirv.Pipe", 1) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePiii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32) + +define spir_func void @baz(target("spirv.Pipe", 1) %p, ptr addrspace(1) %ptr) { +entry: + %p.addr = alloca target("spirv.Pipe", 1), align 8 + %ptr.addr = alloca ptr addrspace(1), align 8 + store target("spirv.Pipe", 1) %p, target("spirv.Pipe", 1)* %p.addr, align 8 + store ptr addrspace(1) %ptr, ptr %ptr.addr, align 8 + %0 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1)* %p.addr, align 8 + %1 = load ptr addrspace(1), ptr %ptr.addr, align 8 + %2 = addrspacecast ptr addrspace(1) %1 to ptr addrspace(4) + call spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePvii(target("spirv.Pipe", 1) %0, ptr addrspace(4) %2, i32 4, i32 4) + ret void +} + +declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIKiEv8ocl_pipePvii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32) + +; CHECK-LLVM: declare spir_func void @__read_pipe_2_bl(ptr addrspace(1), ptr addrspace(4), i32, i32) +; CHECK-LLVM: declare spir_func void @__write_pipe_2_bl(ptr addrspace(1), ptr addrspace(4), i32, i32) + +define linkonce_odr dso_local spir_func void @WritePipeBLockingi9Pointer(ptr addrspace(4) align 2 dereferenceable(2) %_Data) { +entry: + %_Data.addr = alloca ptr addrspace(4), align 8 + %_WPipe = alloca target("spirv.Pipe", 1), align 8 + %_Data.addr.ascast = addrspacecast ptr %_Data.addr to ptr addrspace(4) + %_WPipe.ascast = addrspacecast target("spirv.Pipe", 1)* %_WPipe to target("spirv.Pipe", 1) addrspace(4)* + store ptr addrspace(4) %_Data, ptr addrspace(4) %_Data.addr.ascast, align 8 + %0 = bitcast target("spirv.Pipe", 1)* %_WPipe to ptr + %1 = load target("spirv.Pipe", 1), target("spirv.Pipe", 1) addrspace(4)* %_WPipe.ascast, align 8 + %2 = load ptr addrspace(4), ptr addrspace(4) %_Data.addr.ascast, align 8 + call spir_func void @_Z30__spirv_WritePipeBlockingINTELIDU9_Ev8ocl_pipePKT_ii(target("spirv.Pipe", 1) %1, ptr addrspace(4) %2, i32 2, i32 2) + ret void +} + +declare dso_local spir_func void @_Z30__spirv_WritePipeBlockingINTELIDU9_Ev8ocl_pipePKT_ii(target("spirv.Pipe", 1), ptr addrspace(4), i32, i32) +
\ No newline at end of file diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index a8d37be..c44b3bb 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -2808,6 +2808,348 @@ entry: ret <4 x i32> %spec.store.select7 } +define <2 x i8> @fptosi_v2f32_v2i8(<2 x float> %x) { +; CHECK-LABEL: fptosi_v2f32_v2i8: +; CHECK: .functype fptosi_v2f32_v2i8 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <2 x float> %x to <2 x i8> + ret <2 x i8> %conv +} + +define <2 x i8> @fptoui_v2f32_v2i8(<2 x float> %x) { +; CHECK-LABEL: fptoui_v2f32_v2i8: +; CHECK: .functype fptoui_v2f32_v2i8 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <2 x float> %x to <2 x i8> + ret <2 x i8> %conv +} + +define <2 x i16> @fptosi_v2f32_v2i16(<2 x float> %x) { +; CHECK-LABEL: fptosi_v2f32_v2i16: +; CHECK: .functype fptosi_v2f32_v2i16 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <2 x float> %x to <2 x i16> + ret <2 x i16> %conv +} + +define <2 x i16> @fptoui_v2f32_v2i16(<2 x float> %x) { +; CHECK-LABEL: fptoui_v2f32_v2i16: +; CHECK: .functype fptoui_v2f32_v2i16 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <2 x float> %x to <2 x i16> + ret <2 x i16> %conv +} + +define <4 x i8> @fptosi_v4f32_v4i8(<4 x float> %x) { +; CHECK-LABEL: fptosi_v4f32_v4i8: +; CHECK: .functype fptosi_v4f32_v4i8 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <4 x float> %x to <4 x i8> + ret <4 x i8> %conv +} + +define <4 x i8> @fptoui_v4f32_v4i8(<4 x float> %x) { +; CHECK-LABEL: fptoui_v4f32_v4i8: +; CHECK: .functype fptoui_v4f32_v4i8 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <4 x float> %x to <4 x i8> + ret <4 x i8> %conv +} + +define <4 x i16> @fptosi_v4f32_v4i16(<4 x float> %x) { +; CHECK-LABEL: fptosi_v4f32_v4i16: +; CHECK: .functype fptosi_v4f32_v4i16 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <4 x float> %x to <4 x i16> + ret <4 x i16> %conv +} + +define <4 x i16> @fptoui_v4f32_v4i16(<4 x float> %x) { +; CHECK-LABEL: fptoui_v4f32_v4i16: +; CHECK: .functype fptoui_v4f32_v4i16 (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <4 x float> %x to <4 x i16> + ret <4 x i16> %conv +} + +define <8 x i8> @fptosi_v8f32_v8i8(<8 x float> %x) { +; CHECK-LABEL: fptosi_v8f32_v8i8: +; CHECK: .functype fptosi_v8f32_v8i8 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <8 x float> %x to <8 x i8> + ret <8 x i8> %conv +} + +define <8 x i8> @fptoui_v8f32_v8i8(<8 x float> %x) { +; CHECK-LABEL: fptoui_v8f32_v8i8: +; CHECK: .functype fptoui_v8f32_v8i8 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <8 x float> %x to <8 x i8> + ret <8 x i8> %conv +} + +define <8 x i16> @fptosi_v8f32_v8i16(<8 x float> %x) { +; CHECK-LABEL: fptosi_v8f32_v8i16: +; CHECK: .functype fptosi_v8f32_v8i16 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <8 x float> %x to <8 x i16> + ret <8 x i16> %conv +} + +define <8 x i16> @fptoui_v8f32_v8i16(<8 x float> %x) { +; CHECK-LABEL: fptoui_v8f32_v8i16: +; CHECK: .functype fptoui_v8f32_v8i16 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <8 x float> %x to <8 x i16> + ret <8 x i16> %conv +} + +define <16 x i8> @fptosi_v16f32_v16i8(<16 x float> %x) { +; CHECK-LABEL: fptosi_v16f32_v16i8: +; CHECK: .functype fptosi_v16f32_v16i8 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <16 x float> %x to <16 x i8> + ret <16 x i8> %conv +} + +define <16 x i8> @fptoui_v16f32_v16i8(<16 x float> %x) { +; CHECK-LABEL: fptoui_v16f32_v16i8: +; CHECK: .functype fptoui_v16f32_v16i8 (v128, v128, v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: local.tee 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <16 x float> %x to <16 x i8> + ret <16 x i8> %conv +} + +define <16 x i16> @fptosi_v16f32_v16i16(<16 x float> %x) { +; CHECK-LABEL: fptosi_v16f32_v16i16: +; CHECK: .functype fptosi_v16f32_v16i16 (i32, v128, v128, v128, v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: local.tee 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return + %conv = fptosi <16 x float> %x to <16 x i16> + ret <16 x i16> %conv +} + +define <16 x i16> @fptoui_v16f32_v16i16(<16 x float> %x) { +; CHECK-LABEL: fptoui_v16f32_v16i16: +; CHECK: .functype fptoui_v16f32_v16i16 (i32, v128, v128, v128, v128) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: local.tee 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: v128.store 16 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_u +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: v128.and +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: v128.store 0 +; CHECK-NEXT: # fallthrough-return + %conv = fptoui <16 x float> %x to <16 x i16> + ret <16 x i16> %conv +} + declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) diff --git a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll index 5eb49fd..404db23 100644 --- a/llvm/test/CodeGen/WebAssembly/memory-interleave.ll +++ b/llvm/test/CodeGen/WebAssembly/memory-interleave.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; RUN: opt -mtriple=wasm32 -mattr=+simd128 -passes=loop-vectorize %s | llc -mtriple=wasm32 -mattr=+simd128 -asm-verbose=false -disable-wasm-fallthrough-return-opt | FileCheck %s target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" @@ -20,17 +20,17 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20 ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: i32x4.add -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 ; CHECK: i32x4.add -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 ; CHECK: v128.store define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -64,17 +64,17 @@ define hidden void @two_ints_same_op(ptr noalias nocapture noundef writeonly %0, ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: i32x4.add -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 ; CHECK: i32x4.sub -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 ; CHECK: v128.store define hidden void @two_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -208,27 +208,27 @@ define hidden void @three_shorts(ptr noalias nocapture noundef writeonly %0, ptr ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i16x8.sub -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i16x8.sub -; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i16x8.sub -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i16x8.sub -; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -276,27 +276,27 @@ define hidden void @four_shorts_same_op(ptr noalias nocapture noundef writeonly ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.or -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.or -; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.xor -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.xor -; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -343,27 +343,27 @@ define hidden void @four_shorts_split_op(ptr noalias nocapture noundef writeonly ; CHECK-LABEL: four_shorts_interleave_op: ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.or -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.xor -; CHECK: i8x16.shuffle {{.*}} 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.or -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: v128.xor -; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 0, 1, 4, 5, 20, 21, 0, 1, 0, 1, 6, 7, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 0, 1, 0, 1, 16, 17, 0, 1, 0, 1, 2, 3, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_shorts_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -483,19 +483,19 @@ define hidden void @five_shorts(ptr noalias nocapture noundef writeonly %0, ptr ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 ; CHECK: i16x8.extmul_high_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 ; CHECK: i16x8.extmul_high_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 +; CHECK: i8x16.shuffle 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 ; CHECK: v128.store ; CHECK: i16x8.extmul_low_i8x16_u ; CHECK: i16x8.extmul_low_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 +; CHECK: i8x16.shuffle 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 ; CHECK: v128.store define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -529,18 +529,18 @@ define hidden void @two_bytes_same_op(ptr noalias nocapture noundef writeonly %0 ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 ; CHECK: i16x8.extmul_high_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 ; CHECK: i8x16.sub -; CHECK: i8x16.shuffle {{.*}} 0, 24, 2, 25, 4, 26, 6, 27, 8, 28, 10, 29, 12, 30, 14, 31 +; CHECK: i8x16.shuffle 0, 24, 2, 25, 4, 26, 6, 27, 8, 28, 10, 29, 12, 30, 14, 31 ; CHECK: v128.store ; CHECK: i16x8.extmul_low_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 17, 4, 18, 6, 19, 8, 20, 10, 21, 12, 22, 14, 23 +; CHECK: i8x16.shuffle 0, 16, 2, 17, 4, 18, 6, 19, 8, 20, 10, 21, 12, 22, 14, 23 ; CHECK: v128.store define hidden void @two_bytes_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -672,27 +672,27 @@ define hidden void @three_bytes_interleave_op(ptr noalias nocapture noundef writ ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.and -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.and -; CHECK: i8x16.shuffle {{.*}} 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.and -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.and -; CHECK: i8x16.shuffle {{.*}} 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 +; CHECK: i8x16.shuffle 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23 +; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 +; CHECK: i8x16.shuffle 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0 +; CHECK: i8x16.shuffle 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19 +; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 ; CHECK: v128.store define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -740,25 +740,25 @@ define hidden void @four_bytes_same_op(ptr noalias nocapture noundef writeonly % ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}}, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}}, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extmul_low_i8x16_u -; CHECK: i8x16.shuffle {{.*}}, 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}}, 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extmul_low_i8x16_u -; CHECK: i8x16.shuffle {{.*}}, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 -; CHECK: i8x16.shuffle {{.*}}, 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}}, 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.sub -; CHECK: i8x16.shuffle {{.*}}, 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}}, 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.sub -; CHECK: i8x16.shuffle {{.*}}, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}}, 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}}, 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 +; CHECK: i8x16.shuffle 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 ; CHECK: v128.store define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -806,27 +806,27 @@ define hidden void @four_bytes_split_op(ptr noalias nocapture noundef writeonly ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.add -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.sub -; CHECK: i8x16.shuffle {{.*}} 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.add -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.sub -; CHECK: i8x16.shuffle {{.*}} 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 +; CHECK: i8x16.shuffle 0, 0, 4, 20, 0, 0, 5, 21, 0, 0, 6, 22, 0, 0, 7, 23 +; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 +; CHECK: i8x16.shuffle 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19, 0, 0 +; CHECK: i8x16.shuffle 0, 0, 0, 16, 0, 0, 1, 17, 0, 0, 2, 18, 0, 0, 3, 19 +; CHECK: i8x16.shuffle 0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31 ; CHECK: v128.store define hidden void @four_bytes_interleave_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -1272,45 +1272,45 @@ define hidden void @four_bytes_into_four_ints_same_op(ptr noalias nocapture noun ; CHECK-LABEL: four_bytes_into_four_ints_vary_op: ; CHECK: loop ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i32x4.extend_low_i16x8_u ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i32x4.extend_low_i16x8_u ; CHECK: i32x4.add -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i32x4.extend_low_i16x8_u -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i32x4.extend_low_i16x8_u ; CHECK: i32x4.sub -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i32x4.extmul_low_i16x8_u ; CHECK: v128.and -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i32x4.extend_low_i16x8_u -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noundef writeonly %0, ptr nocapture noundef readonly %1, ptr nocapture noundef readonly %2, i32 noundef %3) { %5 = icmp eq i32 %3, 0 @@ -1365,7 +1365,7 @@ define hidden void @four_bytes_into_four_ints_vary_op(ptr noalias nocapture noun ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 +; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 ; CHECK: v128.store define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { %5 = icmp sgt i32 %3, 0 @@ -1396,35 +1396,35 @@ define hidden void @scale_uv_row_down2(ptr nocapture noundef readonly %0, i32 no ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i16x8.add ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i16x8.add -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i16x8.add ; CHECK: i16x8.add ; CHECK: i16x8.shr_u -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i16x8.add -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i16x8.add -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_u ; CHECK: i16x8.add ; CHECK: i16x8.add ; CHECK: i16x8.shr_u -; CHECK: i8x16.shuffle {{.*}} 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 +; CHECK: i8x16.shuffle 0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30 ; CHECK: v128.store define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { %5 = icmp sgt i32 %3, 0 @@ -1492,13 +1492,13 @@ define hidden void @scale_uv_row_down2_box(ptr nocapture noundef readonly %0, i3 ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 18, 22, 26, 30, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.avgr_u -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 17, 21, 25, 29, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 19, 23, 27, 31, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i8x16.avgr_u -; CHECK: i8x16.shuffle {{.*}} 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 +; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 ; CHECK: v128.store define hidden void @scale_uv_row_down2_linear(ptr nocapture noundef readonly %0, i32 noundef %1, ptr nocapture noundef writeonly %2, i32 noundef %3) { %5 = icmp sgt i32 %3, 0 @@ -1605,28 +1605,28 @@ for.body: ; preds = %entry, %for.body ; CHECK-LABEL: two_bytes_two_floats_same_op: ; CHECK: loop ; CHECK: v128.load64_zero -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load64_zero -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 ; CHECK: v128.store define hidden void @two_bytes_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1663,28 +1663,28 @@ for.body: ; preds = %entry, %for.body ; CHECK-LABEL: two_bytes_two_floats_vary_op: ; CHECK: v128.load64_zero -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load64_zero -; CHECK: i8x16.shuffle {{.*}} 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 2, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.add -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 3, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.sub -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 ; CHECK: v128.store define hidden void @two_bytes_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1723,38 +1723,24 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 255, 255, 255, 255 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.store64_lane define hidden void @two_floats_two_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1791,38 +1777,24 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: f32x4.add -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 255, 255, 255, 255 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 ; CHECK: f32x4.sub -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 0, 16, 1, 17, 2, 18, 3, 19, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: v128.store64_lane define hidden void @two_floats_two_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1858,24 +1830,24 @@ for.body: ; preds = %entry, %for.body ; CHECK-LABEL: two_shorts_two_floats_same_op: ; CHECK: loop ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 ; CHECK: v128.store define hidden void @two_shorts_two_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1913,24 +1885,24 @@ for.body: ; preds = %entry, %for.body ; CHECK-LABEL: two_shorts_two_floats_vary_op: ; CHECK: loop ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.add -; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 6, 7, 10, 11, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.sub -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 ; CHECK: v128.store define hidden void @two_shorts_two_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -1969,38 +1941,22 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 65535, 65535, 65535, 65535 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 ; CHECK: v128.store define hidden void @two_floats_two_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2037,38 +1993,22 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK: f32x4.add -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 65535, 65535, 65535, 65535 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 ; CHECK: f32x4.sub -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 ; CHECK: v128.store define hidden void @two_floats_two_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2195,58 +2135,58 @@ for.body: ; preds = %entry, %for.body ; CHECK-LABEL: four_bytes_four_floats_same_op: ; CHECK: loop ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_bytes_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2302,58 +2242,58 @@ for.body: ; preds = %entry, %for.body ; CHECK-LABEL: four_bytes_four_floats_vary_op: ; CHECK: loop ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 1, 5, 9, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.add -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 2, 6, 10, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.div -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 3, 7, 11, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ; CHECK: i16x8.extend_low_i8x16_s ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.sub -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_bytes_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2410,88 +2350,60 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 255, 255, 255, 255 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 4, 24, 28, 1, 5, 25, 29, 2, 6, 26, 30, 3, 7, 27, 31 ; CHECK: v128.store define hidden void @four_floats_four_bytes_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2544,88 +2456,60 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 255, 255, 255, 255 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.add -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.div -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.sub -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i8x16.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.narrow_i16x8_u +; CHECK: i8x16.shuffle 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 4, 24, 28, 1, 5, 25, 29, 2, 6, 26, 30, 3, 7, 27, 31 ; CHECK: v128.store define hidden void @four_floats_four_bytes_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2678,51 +2562,51 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_shorts_four_floats_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2779,47 +2663,47 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.mul -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 2, 3, 10, 11, 18, 19, 26, 27, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.add -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.div -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s -; CHECK: i8x16.shuffle {{.*}} 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 +; CHECK: i8x16.shuffle 6, 7, 14, 15, 22, 23, 30, 31, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK: i32x4.extend_low_i16x8_s ; CHECK: f32x4.convert_i32x4_s ; CHECK: f32x4.sub -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.store define hidden void @four_shorts_four_floats_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -2876,89 +2760,58 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 65535, 65535, 65535, 65535 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31 ; CHECK: v128.store -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.splat -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27 ; CHECK: v128.store define hidden void @four_floats_four_shorts_same_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: @@ -3011,89 +2864,58 @@ for.body: ; preds = %entry, %for.body ; CHECK: loop ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK: v128.load ; CHECK: v128.load -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.mul -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.splat -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.const 65535, 65535, 65535, 65535 +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 4, 5, 6, 7, 20, 21, 22, 23, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.add -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 8, 9, 10, 11, 24, 25, 26, 27, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 24, 25, 26, 27 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.div -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 -; CHECK: i8x16.shuffle {{.*}} 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 12, 13, 14, 15, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 0, 1, 2, 3, 12, 13, 14, 15, 28, 29, 30, 31 +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 ; CHECK: f32x4.sub -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane +; CHECK: i32x4.trunc_sat_f32x4_s +; CHECK: v128.and +; CHECK: i16x8.narrow_i32x4_u +; CHECK: i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 +; CHECK: i8x16.shuffle 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31 ; CHECK: v128.store -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.splat -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane -; CHECK: f32x4.extract_lane -; CHECK: i32.trunc_sat_f32_s -; CHECK: i16x8.replace_lane +; CHECK: i8x16.shuffle 0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27 ; CHECK: v128.store define hidden void @four_floats_four_shorts_vary_op(ptr noundef readonly captures(none) %a, ptr noundef readonly captures(none) %b, ptr noundef writeonly captures(none) %res, i32 noundef %N) { entry: diff --git a/llvm/test/CodeGen/X86/pr166744.ll b/llvm/test/CodeGen/X86/pr166744.ll new file mode 100644 index 0000000..de2a5e6 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr166744.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=POSTRA +; RUN: llc < %s -mtriple=x86_64-- -mcpu=haswell | FileCheck %s --check-prefixes=NOPOSTRA +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=NOPOSTRA + +; FIXME: Ensure reloads are after narrowed i512 -> i32 store +define i1 @PR166744(ptr %v, i64 %idx, i1 zeroext %b) { +; POSTRA-LABEL: PR166744: +; POSTRA: # %bb.0: +; POSTRA-NEXT: movl $1029, %eax # imm = 0x405 +; POSTRA-NEXT: shlxl %esi, %edx, %edx +; POSTRA-NEXT: bextrl %eax, %esi, %eax +; POSTRA-NEXT: movl (%rdi,%rax,4), %ecx +; POSTRA-NEXT: btrl %esi, %ecx +; POSTRA-NEXT: movq 8(%rdi), %rsi +; POSTRA-NEXT: orq 40(%rdi), %rsi +; POSTRA-NEXT: orl %ecx, %edx +; POSTRA-NEXT: movl %edx, (%rdi,%rax,4) +; POSTRA-NEXT: movq 16(%rdi), %rax +; POSTRA-NEXT: movq 24(%rdi), %rdx +; POSTRA-NEXT: orq 56(%rdi), %rdx +; POSTRA-NEXT: orq 48(%rdi), %rax +; POSTRA-NEXT: movq (%rdi), %rcx +; POSTRA-NEXT: orq 32(%rdi), %rcx +; POSTRA-NEXT: orq %rdx, %rsi +; POSTRA-NEXT: orq %rax, %rcx +; POSTRA-NEXT: orq %rsi, %rcx +; POSTRA-NEXT: setne %al +; POSTRA-NEXT: retq +; +; NOPOSTRA-LABEL: PR166744: +; NOPOSTRA: # %bb.0: +; NOPOSTRA-NEXT: movl %esi, %eax +; NOPOSTRA-NEXT: shrl $3, %eax +; NOPOSTRA-NEXT: andl $60, %eax +; NOPOSTRA-NEXT: movl (%rdi,%rax), %ecx +; NOPOSTRA-NEXT: btrl %esi, %ecx +; NOPOSTRA-NEXT: shlxl %esi, %edx, %edx +; NOPOSTRA-NEXT: orl %ecx, %edx +; NOPOSTRA-NEXT: movl %edx, (%rdi,%rax) +; NOPOSTRA-NEXT: movq 16(%rdi), %rax +; NOPOSTRA-NEXT: movq (%rdi), %rcx +; NOPOSTRA-NEXT: movq 8(%rdi), %rdx +; NOPOSTRA-NEXT: movq 24(%rdi), %rsi +; NOPOSTRA-NEXT: orq 56(%rdi), %rsi +; NOPOSTRA-NEXT: orq 40(%rdi), %rdx +; NOPOSTRA-NEXT: orq 48(%rdi), %rax +; NOPOSTRA-NEXT: orq 32(%rdi), %rcx +; NOPOSTRA-NEXT: orq %rsi, %rdx +; NOPOSTRA-NEXT: orq %rax, %rcx +; NOPOSTRA-NEXT: orq %rdx, %rcx +; NOPOSTRA-NEXT: setne %al +; NOPOSTRA-NEXT: retq + %rem = and i64 %idx, 511 + %sh_prom = zext nneg i64 %rem to i512 + %shl = shl nuw i512 1, %sh_prom + %not = xor i512 %shl, -1 + %load = load i512, ptr %v, align 8 + %and = and i512 %load, %not + %conv2 = zext i1 %b to i512 + %shl4 = shl nuw i512 %conv2, %sh_prom + %or = or i512 %and, %shl4 + store i512 %or, ptr %v, align 8 + %cmp = icmp ne i512 %or, 0 + ret i1 %cmp +} diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s index 78e4f86..ff0dfb3 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -674,46 +674,3 @@ v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// nv bit in FLAT instructions -flat_load_ubyte v5, v[2:3] offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_load_ubyte a5, v[2:3] offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_store_dword v[2:3], v5 offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_store_dword v[2:3], a5 offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_atomic_add_f64 v[0:1], v[2:3] offset:4095 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_load_ubyte v5, v[2:3], off offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_store_byte v[2:3], v5, off offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_add v[2:3], v5, off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap a1, v[2:3], a2, off glc nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap_x2 v[2:3], v[4:5], off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap_x2 v[2:3], a[4:5], off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_load_ubyte v5, off, s2 offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_load_ubyte a5, off, s2 offset:-1 nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_store_dword v2, v3, off nv -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s index 3af0d83..c96a72d 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -706,107 +706,107 @@ flat_load_short_d16_hi a5, v[2:3] offset:4095 glc flat_load_short_d16_hi a5, v[2:3] offset:4095 slc // GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_swap a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x05,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_cmpswap a0, v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_add a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x09,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_add a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_sub a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x0d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_sub a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x11,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umin a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x15,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umin a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_smax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x19,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_umax a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x1d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umax a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_and a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x21,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_and a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_or a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x25,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_or a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_xor a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x29,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_xor a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_inc a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x2d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_inc a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_dec a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x31,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_dec a0, v[2:3], a2 offset:4095 glc // GFX90A: flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_swap_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_cmpswap_x2 a[0:1], v[2:3], a[2:5] offset:4095 glc // GFX90A: flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_add_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_sub_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umin_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x99,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_smax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x9d,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_umax_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_and_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa5,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_or_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xa9,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_xor_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xad,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0xb1,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx942_err.s b/llvm/test/MC/AMDGPU/gfx942_err.s index dc51bab..fd59a01 100644 --- a/llvm/test/MC/AMDGPU/gfx942_err.s +++ b/llvm/test/MC/AMDGPU/gfx942_err.s @@ -125,31 +125,3 @@ global_load_dword v[2:3], off lds scratch_load_dword v2, off lds // GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -// nv bit in FLAT instructions -flat_load_ubyte v5, v[2:3] offset:4095 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_store_dword v[2:3], v5 offset:4095 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -flat_atomic_add_f32 v[2:3], v5 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_load_dword v2, v[2:3], off sc0 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_store_dword v[2:3], v5 off sc0 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_add_f64 v[0:1], v[2:3], off sc1 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -global_atomic_swap v0, v[2:3], v5 off sc0 nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_load_lds_dword v2, off nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU - -scratch_store_dword v2, v3, off nv -// GFX942: :[[@LINE-1]]:{{[0-9]+}}: error: nv is not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s index 7687c0a..5cc3d25 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_flat.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_flat.s @@ -24,18 +24,6 @@ flat_load_ubyte v5, v[1:2] offset:4095 glc flat_load_ubyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] -flat_load_ubyte v5, v[1:2] nv -// CHECK: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] - flat_load_sbyte v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] @@ -60,18 +48,6 @@ flat_load_sbyte v5, v[1:2] offset:4095 glc flat_load_sbyte v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] -flat_load_sbyte v5, v[1:2] nv -// CHECK: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] - flat_load_ushort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] @@ -96,18 +72,6 @@ flat_load_ushort v5, v[1:2] offset:4095 glc flat_load_ushort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] -flat_load_ushort v5, v[1:2] nv -// CHECK: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ushort v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ushort v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ushort v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] - flat_load_sshort v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] @@ -132,18 +96,6 @@ flat_load_sshort v5, v[1:2] offset:4095 glc flat_load_sshort v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] -flat_load_sshort v5, v[1:2] nv -// CHECK: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sshort v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sshort v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sshort v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] - flat_load_dword v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] @@ -168,18 +120,6 @@ flat_load_dword v5, v[1:2] offset:4095 glc flat_load_dword v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] -flat_load_dword v5, v[1:2] nv -// CHECK: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dword v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dword v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dword v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] - flat_load_dwordx2 v[5:6], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] @@ -204,18 +144,6 @@ flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] -flat_load_dwordx2 v[5:6], v[1:2] nv -// CHECK: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] - flat_load_dwordx3 v[5:7], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] @@ -240,18 +168,6 @@ flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] -flat_load_dwordx3 v[5:7], v[1:2] nv -// CHECK: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] - flat_load_dwordx4 v[5:8], v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] @@ -276,18 +192,6 @@ flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] -flat_load_dwordx4 v[5:8], v[1:2] nv -// CHECK: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] - -flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] - flat_store_byte v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] @@ -312,18 +216,6 @@ flat_store_byte v[1:2], v2 offset:4095 glc flat_store_byte v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] -flat_store_byte v[1:2], v2 nv -// CHECK: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] - flat_store_byte_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] @@ -348,18 +240,6 @@ flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] -flat_store_byte_d16_hi v[1:2], v2 nv -// CHECK: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte_d16_hi v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] - -flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] - flat_store_short v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] @@ -384,18 +264,6 @@ flat_store_short v[1:2], v2 offset:4095 glc flat_store_short v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] -flat_store_short v[1:2], v2 nv -// CHECK: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] - flat_store_short_d16_hi v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] @@ -420,18 +288,6 @@ flat_store_short_d16_hi v[1:2], v2 offset:4095 glc flat_store_short_d16_hi v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] -flat_store_short_d16_hi v[1:2], v2 nv -// CHECK: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short_d16_hi v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] - -flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] - flat_store_dword v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] @@ -456,18 +312,6 @@ flat_store_dword v[1:2], v2 offset:4095 glc flat_store_dword v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] -flat_store_dword v[1:2], v2 nv -// CHECK: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dword v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dword v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dword v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] - flat_store_dwordx2 v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] @@ -492,18 +336,6 @@ flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] -flat_store_dwordx2 v[1:2], v[2:3] nv -// CHECK: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv -// CHECK: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] - flat_store_dwordx3 v[1:2], v[2:4] offset:4095 // CHECK: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] @@ -528,18 +360,6 @@ flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc // CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] -flat_store_dwordx3 v[1:2], v[2:4] nv -// CHECK: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv -// CHECK: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] - flat_store_dwordx4 v[1:2], v[2:5] offset:4095 // CHECK: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] @@ -564,18 +384,6 @@ flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc // CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] -flat_store_dwordx4 v[1:2], v[2:5] nv -// CHECK: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv -// CHECK: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] - -flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] - flat_load_ubyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] @@ -600,18 +408,6 @@ flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] -flat_load_ubyte_d16 v5, v[1:2] nv -// CHECK: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16 v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] - flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] @@ -636,18 +432,6 @@ flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] -flat_load_ubyte_d16_hi v5, v[1:2] nv -// CHECK: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] - -flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] - flat_load_sbyte_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] @@ -672,18 +456,6 @@ flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] -flat_load_sbyte_d16 v5, v[1:2] nv -// CHECK: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16 v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] - flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] @@ -708,18 +480,6 @@ flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] -flat_load_sbyte_d16_hi v5, v[1:2] nv -// CHECK: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] - -flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] - flat_load_short_d16 v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] @@ -744,18 +504,6 @@ flat_load_short_d16 v5, v[1:2] offset:4095 glc flat_load_short_d16 v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] -flat_load_short_d16 v5, v[1:2] nv -// CHECK: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16 v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16 v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16 v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] - flat_load_short_d16_hi v5, v[1:2] offset:4095 // CHECK: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] @@ -780,18 +528,6 @@ flat_load_short_d16_hi v5, v[1:2] offset:4095 glc flat_load_short_d16_hi v5, v[1:2] offset:4095 slc // CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] -flat_load_short_d16_hi v5, v[1:2] nv -// CHECK: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16_hi v5, v[1:2] offset:7 nv -// CHECK: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] - -flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] - flat_atomic_swap v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] @@ -816,18 +552,6 @@ flat_atomic_swap v0, v[1:2], v2 offset:4095 glc flat_atomic_swap v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] -flat_atomic_swap v[1:2], v2 nv -// CHECK: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_swap v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_swap v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] - flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 // CHECK: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] @@ -852,18 +576,6 @@ flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc // CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] -flat_atomic_cmpswap v[1:2], v[2:3] nv -// CHECK: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv -// CHECK: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv -// CHECK: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv -// CHECK: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] - flat_atomic_add v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] @@ -888,18 +600,6 @@ flat_atomic_add v0, v[1:2], v2 offset:4095 glc flat_atomic_add v[1:2], v2 offset:4095 slc // CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] -flat_atomic_add v[1:2], v2 nv -// CHECK: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_add v[1:2], v2 offset:7 nv -// CHECK: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv -// CHECK: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] - -flat_atomic_add v[1:2], v2 offset:4095 slc nv -// CHECK: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] - flat_atomic_sub v[1:2], v2 offset:4095 // CHECK: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] @@ -1497,18 +1197,6 @@ global_load_ubyte v5, v1, s[4:5] offset:-1 glc global_load_ubyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x04,0x05] -global_load_ubyte v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] - global_load_sbyte v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x04,0x05] @@ -1554,18 +1242,6 @@ global_load_sbyte v5, v1, s[4:5] offset:-1 glc global_load_sbyte v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x04,0x05] -global_load_sbyte v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] - global_load_ushort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x04,0x05] @@ -1611,18 +1287,6 @@ global_load_ushort v5, v1, s[4:5] offset:-1 glc global_load_ushort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x04,0x05] -global_load_ushort v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] - -global_load_ushort v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] - -global_load_ushort v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] - -global_load_ushort v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] - global_load_sshort v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x04,0x05] @@ -1668,18 +1332,6 @@ global_load_sshort v5, v1, s[4:5] offset:-1 glc global_load_sshort v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x04,0x05] -global_load_sshort v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sshort v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sshort v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] - -global_load_sshort v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] - global_load_dword v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x04,0x05] @@ -1725,18 +1377,6 @@ global_load_dword v5, v1, s[4:5] offset:-1 glc global_load_dword v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x04,0x05] -global_load_dword v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] - -global_load_dword v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] - -global_load_dword v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] - -global_load_dword v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] - global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x04,0x05] @@ -1782,18 +1422,6 @@ global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x04,0x05] -global_load_dwordx2 v[5:6], v1, s[4:5] nv -// CHECK: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] - -global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] - -global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] - -global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] - global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x04,0x05] @@ -1839,15 +1467,6 @@ global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x04,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] nv -// CHECK: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] - global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x04,0x05] @@ -1893,15 +1512,6 @@ global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x04,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] nv -// CHECK: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] -global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] - global_store_byte v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x06,0x00] @@ -1947,18 +1557,6 @@ global_store_byte v1, v2, s[6:7] offset:-1 glc global_store_byte v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x06,0x00] -global_store_byte v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] - global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x06,0x00] @@ -2004,18 +1602,6 @@ global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x06,0x00] -global_store_byte_d16_hi v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] - -global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] - global_store_short v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x06,0x00] @@ -2061,18 +1647,6 @@ global_store_short v1, v2, s[6:7] offset:-1 glc global_store_short v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x06,0x00] -global_store_short v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] - -global_store_short v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] - -global_store_short v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] - -global_store_short v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] - global_store_short_d16_hi v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x06,0x00] @@ -2118,18 +1692,6 @@ global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x06,0x00] -global_store_short_d16_hi v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] - -global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] - -global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] - -global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] - global_store_dword v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x06,0x00] @@ -2175,18 +1737,6 @@ global_store_dword v1, v2, s[6:7] offset:-1 glc global_store_dword v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x06,0x00] -global_store_dword v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] - -global_store_dword v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] - -global_store_dword v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] - -global_store_dword v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] - global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x06,0x00] @@ -2232,18 +1782,6 @@ global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x06,0x00] -global_store_dwordx2 v1, v[2:3], s[6:7] nv -// CHECK: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] - global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x06,0x00] @@ -2289,18 +1827,6 @@ global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x06,0x00] -global_store_dwordx3 v1, v[2:4], s[6:7] nv -// CHECK: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] - global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x06,0x00] @@ -2346,18 +1872,6 @@ global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x06,0x00] -global_store_dwordx4 v1, v[2:5], s[6:7] nv -// CHECK: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] - -global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] - global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x04,0x05] @@ -2403,18 +1917,6 @@ global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x04,0x05] -global_load_ubyte_d16 v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] - global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x04,0x05] @@ -2460,18 +1962,6 @@ global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x04,0x05] -global_load_ubyte_d16_hi v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] - -global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] - global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x04,0x05] @@ -2517,18 +2007,6 @@ global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x04,0x05] -global_load_sbyte_d16 v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] - global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x04,0x05] @@ -2574,18 +2052,6 @@ global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x04,0x05] -global_load_sbyte_d16_hi v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] - -global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] - global_load_short_d16 v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x04,0x05] @@ -2631,18 +2097,6 @@ global_load_short_d16 v5, v1, s[4:5] offset:-1 glc global_load_short_d16 v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x04,0x05] -global_load_short_d16 v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16 v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] - global_load_short_d16_hi v5, v1, s[4:5] offset:-1 // CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x04,0x05] @@ -2688,18 +2142,6 @@ global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc // CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x04,0x05] -global_load_short_d16_hi v5, v1, s[4:5] nv -// CHECK: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv -// CHECK: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] - -global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] - global_atomic_swap v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x06,0x00] @@ -2745,18 +2187,6 @@ global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc global_atomic_swap v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x06,0x00] -global_atomic_swap v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_swap v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] - global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x06,0x00] @@ -2802,18 +2232,6 @@ global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x06,0x00] -global_atomic_cmpswap v1, v[2:3], s[6:7] nv -// CHECK: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] - global_atomic_add v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x06,0x00] @@ -2859,18 +2277,6 @@ global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc global_atomic_add v1, v2, s[6:7] offset:-1 slc // CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x06,0x00] -global_atomic_add v1, v2, s[6:7] nv -// CHECK: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_add v1, v2, s[6:7] offset:-1 nv -// CHECK: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv -// CHECK: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] - -global_atomic_add v1, v2, s[6:7] offset:-1 slc nv -// CHECK: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] - global_atomic_sub v1, v2, s[6:7] offset:-1 // CHECK: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x06,0x00] @@ -3951,18 +3357,6 @@ scratch_load_ubyte v5, off, s2 offset:-1 glc scratch_load_ubyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ubyte v5, off, s2 nv -// CHECK: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sbyte v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] @@ -4008,18 +3402,6 @@ scratch_load_sbyte v5, off, s2 offset:-1 glc scratch_load_sbyte v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sbyte v5, off, s2 nv -// CHECK: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] - scratch_load_ushort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] @@ -4065,18 +3447,6 @@ scratch_load_ushort v5, off, s2 offset:-1 glc scratch_load_ushort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ushort v5, off, s2 nv -// CHECK: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ushort v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ushort v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ushort v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sshort v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] @@ -4122,18 +3492,6 @@ scratch_load_sshort v5, off, s2 offset:-1 glc scratch_load_sshort v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sshort v5, off, s2 nv -// CHECK: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sshort v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sshort v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sshort v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dword v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] @@ -4179,18 +3537,6 @@ scratch_load_dword v5, off, s2 offset:-1 glc scratch_load_dword v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dword v5, off, s2 nv -// CHECK: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dword v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dword v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dword v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dwordx2 v[5:6], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] @@ -4236,18 +3582,6 @@ scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dwordx2 v[5:6], off, s2 nv -// CHECK: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dwordx3 v[5:7], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] @@ -4293,18 +3627,6 @@ scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dwordx3 v[5:7], off, s2 nv -// CHECK: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] - scratch_load_dwordx4 v[5:8], off, s2 offset:-1 // CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] @@ -4350,18 +3672,6 @@ scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] -scratch_load_dwordx4 v[5:8], off, s2 nv -// CHECK: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] - scratch_store_byte off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] @@ -4407,18 +3717,6 @@ scratch_store_byte off, v2, s3 offset:-1 glc scratch_store_byte off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] -scratch_store_byte off, v2, s3 nv -// CHECK: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] - scratch_store_byte_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] @@ -4464,18 +3762,6 @@ scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] -scratch_store_byte_d16_hi off, v2, s3 nv -// CHECK: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] - scratch_store_short off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] @@ -4521,18 +3807,6 @@ scratch_store_short off, v2, s3 offset:-1 glc scratch_store_short off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] -scratch_store_short off, v2, s3 nv -// CHECK: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] - scratch_store_short_d16_hi off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] @@ -4578,18 +3852,6 @@ scratch_store_short_d16_hi off, v2, s3 offset:-1 glc scratch_store_short_d16_hi off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] -scratch_store_short_d16_hi off, v2, s3 nv -// CHECK: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short_d16_hi off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dword off, v2, s3 offset:-1 // CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] @@ -4635,18 +3897,6 @@ scratch_store_dword off, v2, s3 offset:-1 glc scratch_store_dword off, v2, s3 offset:-1 slc // CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dword off, v2, s3 nv -// CHECK: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dword off, v2, s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dword off, v2, s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dword off, v2, s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dwordx2 off, v[2:3], s3 offset:-1 // CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] @@ -4692,18 +3942,6 @@ scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dwordx2 off, v[2:3], s3 nv -// CHECK: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dwordx3 off, v[2:4], s3 offset:-1 // CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] @@ -4749,18 +3987,6 @@ scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dwordx3 off, v[2:4], s3 nv -// CHECK: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] - scratch_store_dwordx4 off, v[2:5], s3 offset:-1 // CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] @@ -4806,18 +4032,6 @@ scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc // CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] -scratch_store_dwordx4 off, v[2:5], s3 nv -// CHECK: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv -// CHECK: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] - -scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] - scratch_load_ubyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] @@ -4863,18 +4077,6 @@ scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ubyte_d16 v5, off, s2 nv -// CHECK: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] - scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] @@ -4920,18 +4122,6 @@ scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] -scratch_load_ubyte_d16_hi v5, off, s2 nv -// CHECK: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sbyte_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] @@ -4977,18 +4167,6 @@ scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sbyte_d16 v5, off, s2 nv -// CHECK: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] - scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] @@ -5034,18 +4212,6 @@ scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] -scratch_load_sbyte_d16_hi v5, off, s2 nv -// CHECK: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] - scratch_load_short_d16 v5, off, s2 offset:-1 // CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] @@ -5088,18 +4254,6 @@ scratch_load_short_d16 v5, off, s2 offset:-4096 scratch_load_short_d16 v5, off, s2 offset:-1 glc // CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x02,0x05] -scratch_load_short_d16 v5, off, s2 nv -// CHECK: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16 v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16 v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16 v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] - scratch_load_short_d16 v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] @@ -5148,18 +4302,6 @@ scratch_load_short_d16_hi v5, off, s2 offset:-1 glc scratch_load_short_d16_hi v5, off, s2 offset:-1 slc // CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] -scratch_load_short_d16_hi v5, off, s2 nv -// CHECK: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16_hi v5, off, s2 offset:-1 nv -// CHECK: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv -// CHECK: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] - -scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv -// CHECK: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] - global_load_dword v[2:3], off lds // CHECK: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt index 4c06585..0ee659e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_flat.txt @@ -21,18 +21,6 @@ # CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x42,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ubyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x40,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x41,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x42,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x44,0xdc,0x01,0x00,0x00,0x05 @@ -54,18 +42,6 @@ # CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x46,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sbyte v5, v[1:2] nv ; encoding: [0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x44,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x45,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x46,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_ushort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x48,0xdc,0x01,0x00,0x00,0x05 @@ -87,18 +63,6 @@ # CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4a,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ushort v5, v[1:2] nv ; encoding: [0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ushort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x48,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ushort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x49,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ushort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x4a,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sshort v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4c,0xdc,0x01,0x00,0x00,0x05 @@ -120,18 +84,6 @@ # CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x4e,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sshort v5, v[1:2] nv ; encoding: [0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sshort v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x4c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sshort v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x4d,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sshort v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x4e,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dword v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x50,0xdc,0x01,0x00,0x00,0x05 @@ -153,18 +105,6 @@ # CHECK: flat_load_dword v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x52,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dword v5, v[1:2] nv ; encoding: [0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dword v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x50,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dword v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x51,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dword v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x52,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x54,0xdc,0x01,0x00,0x00,0x05 @@ -186,18 +126,6 @@ # CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x56,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] nv ; encoding: [0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x54,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x55,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx2 v[5:6], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x56,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x58,0xdc,0x01,0x00,0x00,0x05 @@ -219,18 +147,6 @@ # CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5a,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] nv ; encoding: [0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x58,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x59,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx3 v[5:7], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x5a,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5c,0xdc,0x01,0x00,0x00,0x05 @@ -252,18 +168,6 @@ # CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x5e,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] nv ; encoding: [0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x5c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x5d,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_dwordx4 v[5:8], v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x5e,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_store_byte v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x60,0xdc,0x01,0x02,0x00,0x00 @@ -285,18 +189,6 @@ # CHECK: flat_store_byte v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x62,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_byte v[1:2], v2 nv ; encoding: [0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x60,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x61,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x62,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x64,0xdc,0x01,0x02,0x00,0x00 @@ -318,18 +210,6 @@ # CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x66,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_byte_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x64,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x65,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_byte_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x66,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_short v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x68,0xdc,0x01,0x02,0x00,0x00 @@ -351,18 +231,6 @@ # CHECK: flat_store_short v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6a,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_short v[1:2], v2 nv ; encoding: [0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x68,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x69,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x6a,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6c,0xdc,0x01,0x02,0x00,0x00 @@ -384,18 +252,6 @@ # CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x6e,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_short_d16_hi v[1:2], v2 nv ; encoding: [0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x6c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x6d,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_short_d16_hi v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x6e,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dword v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x70,0xdc,0x01,0x02,0x00,0x00 @@ -417,18 +273,6 @@ # CHECK: flat_store_dword v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x72,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dword v[1:2], v2 nv ; encoding: [0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dword v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x70,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dword v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x71,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dword v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x72,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x74,0xdc,0x01,0x02,0x00,0x00 @@ -450,18 +294,6 @@ # CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x76,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x74,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x75,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx2 v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x76,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x78,0xdc,0x01,0x02,0x00,0x00 @@ -483,18 +315,6 @@ # CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7a,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] nv ; encoding: [0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:7 nv ; encoding: [0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x78,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 glc nv ; encoding: [0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x79,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx3 v[1:2], v[2:4] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x7a,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7c,0xdc,0x01,0x02,0x00,0x00 @@ -516,18 +336,6 @@ # CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00] 0xff,0x0f,0x7e,0xdc,0x01,0x02,0x00,0x00 -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] nv ; encoding: [0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] -0x00,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:7 nv ; encoding: [0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00] -0x07,0x00,0x7c,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 glc nv ; encoding: [0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x7d,0xdc,0x01,0x02,0x80,0x00 - -# CHECK: flat_store_dwordx4 v[1:2], v[2:5] offset:4095 slc nv ; encoding: [0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00] -0xff,0x0f,0x7e,0xdc,0x01,0x02,0x80,0x00 - # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x80,0xdc,0x01,0x00,0x00,0x05 @@ -549,18 +357,6 @@ # CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x82,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ubyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x80,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x81,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x82,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x84,0xdc,0x01,0x00,0x00,0x05 @@ -582,18 +378,6 @@ # CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x86,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x84,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x85,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_ubyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x86,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x88,0xdc,0x01,0x00,0x00,0x05 @@ -615,18 +399,6 @@ # CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8a,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sbyte_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x88,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x89,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x8a,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8c,0xdc,0x01,0x00,0x00,0x05 @@ -648,18 +420,6 @@ # CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x8e,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x8c,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x8d,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_sbyte_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x8e,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x90,0xdc,0x01,0x00,0x00,0x05 @@ -681,18 +441,6 @@ # CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x92,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_short_d16 v5, v[1:2] nv ; encoding: [0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16 v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x90,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x91,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16 v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x92,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x94,0xdc,0x01,0x00,0x00,0x05 @@ -714,18 +462,6 @@ # CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05] 0xff,0x0f,0x96,0xdc,0x01,0x00,0x00,0x05 -# CHECK: flat_load_short_d16_hi v5, v[1:2] nv ; encoding: [0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] -0x00,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:7 nv ; encoding: [0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05] -0x07,0x00,0x94,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 glc nv ; encoding: [0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x95,0xdc,0x01,0x00,0x80,0x05 - -# CHECK: flat_load_short_d16_hi v5, v[1:2] offset:4095 slc nv ; encoding: [0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05] -0xff,0x0f,0x96,0xdc,0x01,0x00,0x80,0x05 - # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x00,0xdd,0x01,0x02,0x00,0x00 @@ -747,18 +483,6 @@ # CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x02,0xdd,0x01,0x02,0x00,0x00 -# CHECK: flat_atomic_swap v[1:2], v2 nv ; encoding: [0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] -0x00,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_swap v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00] -0x07,0x00,0x00,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_swap v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x01,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_swap v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x02,0xdd,0x01,0x02,0x80,0x00 - # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x04,0xdd,0x01,0x02,0x00,0x00 @@ -780,18 +504,6 @@ # CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x06,0xdd,0x01,0x02,0x00,0x00 -# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] nv ; encoding: [0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] -0x00,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:7 nv ; encoding: [0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00] -0x07,0x00,0x04,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_cmpswap v0, v[1:2], v[2:3] offset:4095 glc nv ; encoding: [0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x05,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_cmpswap v[1:2], v[2:3] offset:4095 slc nv ; encoding: [0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x06,0xdd,0x01,0x02,0x80,0x00 - # CHECK: flat_atomic_add v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x08,0xdd,0x01,0x02,0x00,0x00 @@ -813,18 +525,6 @@ # CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0a,0xdd,0x01,0x02,0x00,0x00 -# CHECK: flat_atomic_add v[1:2], v2 nv ; encoding: [0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] -0x00,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_add v[1:2], v2 offset:7 nv ; encoding: [0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00] -0x07,0x00,0x08,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_add v0, v[1:2], v2 offset:4095 glc nv ; encoding: [0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x09,0xdd,0x01,0x02,0x80,0x00 - -# CHECK: flat_atomic_add v[1:2], v2 offset:4095 slc nv ; encoding: [0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00] -0xff,0x0f,0x0a,0xdd,0x01,0x02,0x80,0x00 - # CHECK: flat_atomic_sub v[1:2], v2 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00] 0xff,0x0f,0x0c,0xdd,0x01,0x02,0x00,0x00 @@ -1317,18 +1017,6 @@ # CHECK: global_load_ubyte v5, v[1:2], off ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x40,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ubyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x40,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x40,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x41,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x42,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sbyte v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x44,0xdc,0x01,0x00,0x7f,0x05 @@ -1338,18 +1026,6 @@ # CHECK: global_load_sbyte v5, v[1:2], off ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x44,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sbyte v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x44,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x44,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x45,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x46,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_ushort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x48,0xdc,0x01,0x00,0x7f,0x05 @@ -1359,18 +1035,6 @@ # CHECK: global_load_ushort v5, v[1:2], off ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x48,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ushort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x48,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x48,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x49,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ushort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4a,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sshort v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x4c,0xdc,0x01,0x00,0x7f,0x05 @@ -1380,18 +1044,6 @@ # CHECK: global_load_sshort v5, v[1:2], off ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x4c,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sshort v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x4c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4d,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sshort v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x4e,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dword v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x50,0xdc,0x01,0x00,0x7f,0x05 @@ -1401,18 +1053,6 @@ # CHECK: global_load_dword v5, v[1:2], off ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x50,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dword v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x50,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x50,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x51,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dword v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x52,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dwordx2 v[5:6], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x54,0xdc,0x01,0x00,0x7f,0x05 @@ -1422,18 +1062,6 @@ # CHECK: global_load_dwordx2 v[5:6], v[1:2], off ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x54,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] nv ; encoding: [0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x54,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x54,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x55,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx2 v[5:6], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x56,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dwordx3 v[5:7], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x58,0xdc,0x01,0x00,0x7f,0x05 @@ -1443,18 +1071,6 @@ # CHECK: global_load_dwordx3 v[5:7], v[1:2], off ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x58,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] nv ; encoding: [0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x58,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x58,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x59,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx3 v[5:7], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5a,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_dwordx4 v[5:8], v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x5c,0xdc,0x01,0x00,0x7f,0x05 @@ -1464,18 +1080,6 @@ # CHECK: global_load_dwordx4 v[5:8], v[1:2], off ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x5c,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] nv ; encoding: [0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x5c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5d,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_dwordx4 v[5:8], v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x5e,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_store_byte v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x60,0xdc,0x01,0x02,0x7f,0x00 @@ -1485,18 +1089,6 @@ # CHECK: global_store_byte v[1:2], v2, off ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x60,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_byte v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x60,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x60,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x61,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x62,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_byte_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x64,0xdc,0x01,0x02,0x7f,0x00 @@ -1506,18 +1098,6 @@ # CHECK: global_store_byte_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x64,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x64,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x64,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x65,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_byte_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x66,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_short v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x68,0xdc,0x01,0x02,0x7f,0x00 @@ -1527,18 +1107,6 @@ # CHECK: global_store_short v[1:2], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x68,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_short v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x68,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x68,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x69,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6a,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_short_d16_hi v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x6c,0xdc,0x01,0x02,0x7f,0x00 @@ -1548,18 +1116,6 @@ # CHECK: global_store_short_d16_hi v[1:2], v2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x6c,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x6c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6d,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_short_d16_hi v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x6e,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dword v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x70,0xdc,0x01,0x02,0x7f,0x00 @@ -1569,18 +1125,6 @@ # CHECK: global_store_dword v[1:2], v2, off ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x70,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dword v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x70,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x70,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x71,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dword v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x72,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dwordx2 v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x74,0xdc,0x01,0x02,0x7f,0x00 @@ -1590,18 +1134,6 @@ # CHECK: global_store_dwordx2 v[1:2], v[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x74,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x74,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x74,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x75,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx2 v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x76,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dwordx3 v[1:2], v[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x78,0xdc,0x01,0x02,0x7f,0x00 @@ -1611,18 +1143,6 @@ # CHECK: global_store_dwordx3 v[1:2], v[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x78,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] nv ; encoding: [0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x78,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x78,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x79,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx3 v1, v[2:4], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7a,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_store_dwordx4 v[1:2], v[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x7c,0xdc,0x01,0x02,0x7f,0x00 @@ -1632,18 +1152,6 @@ # CHECK: global_store_dwordx4 v[1:2], v[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00] 0x00,0x80,0x7c,0xdc,0x01,0x02,0x7f,0x00 -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] nv ; encoding: [0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00] -0x00,0x80,0x7c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7c,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7d,0xdc,0x01,0x02,0x86,0x00 - -# CHECK: global_store_dwordx4 v1, v[2:5], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00] -0xff,0x9f,0x7e,0xdc,0x01,0x02,0x86,0x00 - # CHECK: global_load_ubyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x80,0xdc,0x01,0x00,0x7f,0x05 @@ -1653,18 +1161,6 @@ # CHECK: global_load_ubyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x80,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x80,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x80,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x81,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x82,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x84,0xdc,0x01,0x00,0x7f,0x05 @@ -1674,18 +1170,6 @@ # CHECK: global_load_ubyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x84,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x84,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x84,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x85,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_ubyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x86,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sbyte_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x88,0xdc,0x01,0x00,0x7f,0x05 @@ -1695,18 +1179,6 @@ # CHECK: global_load_sbyte_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x88,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x88,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x88,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x89,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8a,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x8c,0xdc,0x01,0x00,0x7f,0x05 @@ -1716,18 +1188,6 @@ # CHECK: global_load_sbyte_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x8c,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x8c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8c,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8d,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_sbyte_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x8e,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_short_d16 v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x90,0xdc,0x01,0x00,0x7f,0x05 @@ -1737,18 +1197,6 @@ # CHECK: global_load_short_d16 v5, v[1:2], off ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x90,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_short_d16 v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x90,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x90,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x91,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16 v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x92,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_load_short_d16_hi v5, v[1:2], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05] 0xff,0x9f,0x94,0xdc,0x01,0x00,0x7f,0x05 @@ -1758,18 +1206,6 @@ # CHECK: global_load_short_d16_hi v5, v[1:2], off ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05] 0x00,0x80,0x94,0xdc,0x01,0x00,0x7f,0x05 -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] nv ; encoding: [0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05] -0x00,0x80,0x94,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 nv ; encoding: [0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x94,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 glc nv ; encoding: [0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x95,0xdc,0x01,0x00,0x84,0x05 - -# CHECK: global_load_short_d16_hi v5, v1, s[4:5] offset:-1 slc nv ; encoding: [0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05] -0xff,0x9f,0x96,0xdc,0x01,0x00,0x84,0x05 - # CHECK: global_atomic_swap v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x00,0xdd,0x01,0x02,0x7f,0x00 @@ -1779,18 +1215,6 @@ # CHECK: global_atomic_swap v[1:2], v2, off ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x00,0xdd,0x01,0x02,0x7f,0x00 -# CHECK: global_atomic_swap v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00] -0x00,0x80,0x00,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x00,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_swap v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x01,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_swap v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x02,0xdd,0x01,0x02,0x86,0x00 - # CHECK: global_atomic_cmpswap v[1:2], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x04,0xdd,0x01,0x02,0x7f,0x00 @@ -1812,18 +1236,6 @@ # CHECK: global_atomic_cmpswap v1, v[2:3], v[4:5], off glc ; encoding: [0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01] 0x00,0x80,0x05,0xdd,0x02,0x04,0x7f,0x01 -# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] nv ; encoding: [0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00] -0x00,0x80,0x04,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x04,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_cmpswap v0, v1, v[2:3], s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x05,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_cmpswap v1, v[2:3], s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x06,0xdd,0x01,0x02,0x86,0x00 - # CHECK: global_atomic_add v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x08,0xdd,0x01,0x02,0x7f,0x00 @@ -1833,18 +1245,6 @@ # CHECK: global_atomic_add v[1:2], v2, off ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00] 0x00,0x80,0x08,0xdd,0x01,0x02,0x7f,0x00 -# CHECK: global_atomic_add v1, v2, s[6:7] nv ; encoding: [0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00] -0x00,0x80,0x08,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 nv ; encoding: [0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x08,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_add v0, v1, v2, s[6:7] offset:-1 glc nv ; encoding: [0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x09,0xdd,0x01,0x02,0x86,0x00 - -# CHECK: global_atomic_add v1, v2, s[6:7] offset:-1 slc nv ; encoding: [0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00] -0xff,0x9f,0x0a,0xdd,0x01,0x02,0x86,0x00 - # CHECK: global_atomic_sub v[1:2], v2, off offset:-1 ; encoding: [0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00] 0xff,0x9f,0x0c,0xdd,0x01,0x02,0x7f,0x00 @@ -2103,18 +1503,6 @@ # CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x42,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ubyte v5, off, s2 nv ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x44,0xdc,0x00,0x00,0x02,0x05 @@ -2154,18 +1542,6 @@ # CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x46,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sbyte v5, off, s2 nv ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_ushort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x48,0xdc,0x00,0x00,0x02,0x05 @@ -2205,18 +1581,6 @@ # CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4a,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ushort v5, off, s2 nv ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ushort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ushort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ushort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sshort v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4c,0xdc,0x00,0x00,0x02,0x05 @@ -2256,18 +1620,6 @@ # CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x4e,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sshort v5, off, s2 nv ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sshort v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sshort v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sshort v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dword v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x50,0xdc,0x00,0x00,0x02,0x05 @@ -2307,18 +1659,6 @@ # CHECK: scratch_load_dword v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x52,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dword v5, off, s2 nv ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dword v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dword v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dword v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x54,0xdc,0x00,0x00,0x02,0x05 @@ -2358,18 +1698,6 @@ # CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x56,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 nv ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx2 v[5:6], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x58,0xdc,0x00,0x00,0x02,0x05 @@ -2409,18 +1737,6 @@ # CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5a,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 nv ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx3 v[5:7], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5c,0xdc,0x00,0x00,0x02,0x05 @@ -2460,18 +1776,6 @@ # CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x5e,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 nv ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_dwordx4 v[5:8], off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_store_byte off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x60,0xdc,0x00,0x02,0x03,0x00 @@ -2511,18 +1815,6 @@ # CHECK: scratch_store_byte off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x62,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_byte off, v2, s3 nv ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x64,0xdc,0x00,0x02,0x03,0x00 @@ -2562,18 +1854,6 @@ # CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x66,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_byte_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_byte_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_short off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x68,0xdc,0x00,0x02,0x03,0x00 @@ -2613,18 +1893,6 @@ # CHECK: scratch_store_short off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6a,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_short off, v2, s3 nv ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6c,0xdc,0x00,0x02,0x03,0x00 @@ -2664,18 +1932,6 @@ # CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x6e,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_short_d16_hi off, v2, s3 nv ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_short_d16_hi off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dword off, v2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x70,0xdc,0x00,0x02,0x03,0x00 @@ -2715,18 +1971,6 @@ # CHECK: scratch_store_dword off, v2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x72,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dword off, v2, s3 nv ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dword off, v2, s3 offset:-1 nv ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dword off, v2, s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dword off, v2, s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x74,0xdc,0x00,0x02,0x03,0x00 @@ -2766,18 +2010,6 @@ # CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x76,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 nv ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx2 off, v[2:3], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x78,0xdc,0x00,0x02,0x03,0x00 @@ -2817,18 +2049,6 @@ # CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7a,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 nv ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx3 off, v[2:4], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7c,0xdc,0x00,0x02,0x03,0x00 @@ -2868,18 +2088,6 @@ # CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00] 0xff,0x5f,0x7e,0xdc,0x00,0x02,0x03,0x00 -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 nv ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] -0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 nv ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 glc nv ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00 - -# CHECK: scratch_store_dwordx4 off, v[2:5], s3 offset:-1 slc nv ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] -0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00 - # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x80,0xdc,0x00,0x00,0x02,0x05 @@ -2919,18 +2127,6 @@ # CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x82,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ubyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x84,0xdc,0x00,0x00,0x02,0x05 @@ -2970,18 +2166,6 @@ # CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x86,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_ubyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x88,0xdc,0x00,0x00,0x02,0x05 @@ -3021,18 +2205,6 @@ # CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8a,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sbyte_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8c,0xdc,0x00,0x00,0x02,0x05 @@ -3072,18 +2244,6 @@ # CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x8e,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_sbyte_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x90,0xdc,0x00,0x00,0x02,0x05 @@ -3123,18 +2283,6 @@ # CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x92,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_short_d16 v5, off, s2 nv ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16 v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05 - # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x94,0xdc,0x00,0x00,0x02,0x05 @@ -3174,18 +2322,6 @@ # CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05] 0xff,0x5f,0x96,0xdc,0x00,0x00,0x02,0x05 -# CHECK: scratch_load_short_d16_hi v5, off, s2 nv ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] -0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 nv ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 glc nv ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05 - -# CHECK: scratch_load_short_d16_hi v5, off, s2 offset:-1 slc nv ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] -0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05 - # CHECK: global_load_dword v[2:3], off lds ; encoding: [0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00] 0x00,0xa0,0x50,0xdc,0x02,0x00,0x7f,0x00 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll deleted file mode 100644 index bd5f4e2..0000000 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_store.ll +++ /dev/null @@ -1,117 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 -; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses=true -max-interleave-group-factor=16 -S < %s | FileCheck %s - -define dso_local void @_Z6unpackPhS_(ptr noalias noundef readonly captures(none) %in, ptr noalias noundef writeonly captures(none) %out) { -; CHECK-LABEL: define dso_local void @_Z6unpackPhS_( -; CHECK-SAME: ptr noalias noundef readonly captures(none) [[IN:%.*]], ptr noalias noundef writeonly captures(none) [[OUT:%.*]]) { -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[OUT]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 4 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[IN]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i8>, ptr [[NEXT_GEP3]], align 1, !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> -; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <16 x i8> [[WIDE_VEC]], <16 x i8> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> -; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC5]] -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC4]] -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC4]] -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[STRIDED_VEC]] -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]] -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i8> [[STRIDED_VEC4]], [[STRIDED_VEC]] -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i8> [[STRIDED_VEC5]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i8> [[STRIDED_VEC6]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> zeroinitializer, <4 x i8> [[STRIDED_VEC6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC5]], <4 x i8> [[TMP0]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC4]], <4 x i8> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[STRIDED_VEC]], <4 x i8> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i8> [[TMP13]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP15]], <8 x i8> [[TMP16]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i8> [[TMP17]], <8 x i8> [[TMP18]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i8> [[TMP19]], <16 x i8> [[TMP20]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP21]], <16 x i8> [[TMP22]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <32 x i8> [[TMP23]], <32 x i8> [[TMP24]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <64 x i8> [[TMP25]], <64 x i8> poison, <64 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> -; CHECK-NEXT: store <64 x i8> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %vector.body, !llvm.loop [[LOOP5:![0-9]+]] -; -entry: - br label %for.body - -for.cond.cleanup: ; preds = %for.body - ret void - -for.body: ; preds = %entry, %for.body - %i.033 = phi i32 [ 0, %entry ], [ %inc17, %for.body ] - %out.addr.032 = phi ptr [ %out, %entry ], [ %add.ptr, %for.body ] - %in.addr.031 = phi ptr [ %in, %entry ], [ %add.ptr15, %for.body ] - store i8 0, ptr %out.addr.032, align 1 - %arrayidx10 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 3 - %0 = load i8, ptr %arrayidx10, align 1 - %arrayidx14 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 1 - store i8 %0, ptr %arrayidx14, align 1 - %arrayidx10.1 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 2 - %1 = load i8, ptr %arrayidx10.1, align 1 - %arrayidx14.1 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 2 - store i8 %1, ptr %arrayidx14.1, align 1 - %add.2 = add i8 %0, %1 - %arrayidx14.2 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 3 - store i8 %add.2, ptr %arrayidx14.2, align 1 - %arrayidx10.3 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 1 - %2 = load i8, ptr %arrayidx10.3, align 1 - %arrayidx14.3 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 4 - store i8 %2, ptr %arrayidx14.3, align 1 - %add.4 = add i8 %0, %2 - %arrayidx14.4 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 5 - store i8 %add.4, ptr %arrayidx14.4, align 1 - %add.5 = add i8 %1, %2 - %arrayidx14.5 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 6 - store i8 %add.5, ptr %arrayidx14.5, align 1 - %add.6 = add i8 %0, %add.5 - %arrayidx14.6 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 7 - store i8 %add.6, ptr %arrayidx14.6, align 1 - %3 = load i8, ptr %in.addr.031, align 1 - %arrayidx14.7 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 8 - store i8 %3, ptr %arrayidx14.7, align 1 - %add.8 = add i8 %0, %3 - %arrayidx14.8 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 9 - store i8 %add.8, ptr %arrayidx14.8, align 1 - %add.9 = add i8 %1, %3 - %arrayidx14.9 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 10 - store i8 %add.9, ptr %arrayidx14.9, align 1 - %add.10 = add i8 %0, %add.9 - %arrayidx14.10 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 11 - store i8 %add.10, ptr %arrayidx14.10, align 1 - %add.11 = add i8 %2, %3 - %arrayidx14.11 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 12 - store i8 %add.11, ptr %arrayidx14.11, align 1 - %add.12 = add i8 %0, %add.11 - %arrayidx14.12 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 13 - store i8 %add.12, ptr %arrayidx14.12, align 1 - %add.13 = add i8 %1, %add.11 - %arrayidx14.13 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 14 - store i8 %add.13, ptr %arrayidx14.13, align 1 - %add.14 = add i8 %0, %add.13 - %arrayidx14.14 = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 15 - store i8 %add.14, ptr %arrayidx14.14, align 1 - %add.ptr = getelementptr inbounds nuw i8, ptr %out.addr.032, i64 16 - %add.ptr15 = getelementptr inbounds nuw i8, ptr %in.addr.031, i64 4 - %inc17 = add nuw nsw i32 %i.033, 1 - %exitcond.not = icmp eq i32 %inc17, 32 - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 -} - -!0 = distinct !{!0, !1} -!1 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll new file mode 100644 index 0000000..fe7f43f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked_ldst_sme.ll @@ -0,0 +1,187 @@ +; RUN: opt < %s -passes=loop-vectorize -S | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define void @wombat(i32 %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, i8 %arg6) #0 { +; CHECK-LABEL: define void @wombat( +; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]], ptr [[ARG3:%.*]], ptr [[ARG4:%.*]], ptr [[ARG5:%.*]], i8 [[ARG6:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[ARG]], 0 +; CHECK-NEXT: br i1 [[ICMP]], label %[[BB7:.*]], label %[[BB25:.*]] +; CHECK: [[BB7]]: +; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[ARG]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[ARG1]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[ARG5]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[ARG3]], i64 [[ZEXT]] +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[ARG4]], i64 [[ZEXT]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: [[BOUND05:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND16:%.*]] = icmp ult ptr [[ARG5]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT7:%.*]] = and i1 [[BOUND05]], [[BOUND16]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT7]] +; CHECK-NEXT: [[BOUND08:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND19:%.*]] = icmp ult ptr [[ARG3]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT10:%.*]] = and i1 [[BOUND08]], [[BOUND19]] +; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT10]] +; CHECK-NEXT: [[BOUND012:%.*]] = icmp ult ptr [[ARG1]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND113:%.*]] = icmp ult ptr [[ARG4]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]] +; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX11]], [[FOUND_CONFLICT14]] +; CHECK-NEXT: [[BOUND016:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP2]] +; CHECK-NEXT: [[BOUND117:%.*]] = icmp ult ptr [[ARG5]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] +; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX15]], [[FOUND_CONFLICT18]] +; CHECK-NEXT: [[BOUND020:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP3]] +; CHECK-NEXT: [[BOUND121:%.*]] = icmp ult ptr [[ARG3]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT22:%.*]] = and i1 [[BOUND020]], [[BOUND121]] +; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX19]], [[FOUND_CONFLICT22]] +; CHECK-NEXT: [[BOUND024:%.*]] = icmp ult ptr [[ARG2]], [[SCEVGEP4]] +; CHECK-NEXT: [[BOUND125:%.*]] = icmp ult ptr [[ARG4]], [[SCEVGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT26:%.*]] = and i1 [[BOUND024]], [[BOUND125]] +; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX23]], [[FOUND_CONFLICT26]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX27]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[ARG6]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG5]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP4]], align 1, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp uge <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[ARG1]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP6]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META3:![0-9]+]], !noalias [[META5:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[ARG3]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD28:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META9:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[ARG4]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD29:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP8]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD29]], [[WIDE_MASKED_LOAD28]] +; CHECK-NEXT: [[TMP10:%.*]] = add <vscale x 16 x i8> [[TMP9]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP10]], ptr align 1 [[TMP6]], <vscale x 16 x i1> [[TMP5]]), !alias.scope [[META3]], !noalias [[META5]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[ARG2]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP11]], <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i8> poison), !alias.scope [[META11:![0-9]+]], !noalias [[META12:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD28]], [[WIDE_MASKED_LOAD28]] +; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD30]], [[TMP12]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP13]], ptr align 1 [[TMP11]], <vscale x 16 x i1> [[TMP5]]), !alias.scope [[META11]], !noalias [[META12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[BB24:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[BB7]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label %[[BB8:.*]] +; CHECK: [[BB8]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD22:%.*]], %[[BB21:.*]] ] +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG5]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GETELEMENTPTR]], align 1 +; CHECK-NEXT: [[ICMP9:%.*]] = icmp ult i8 [[LOAD]], [[ARG6]] +; CHECK-NEXT: br i1 [[ICMP9]], label %[[BB21]], label %[[BB10:.*]] +; CHECK: [[BB10]]: +; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG1]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD12:%.*]] = load i8, ptr [[GETELEMENTPTR11]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG3]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD14:%.*]] = load i8, ptr [[GETELEMENTPTR13]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG4]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD16:%.*]] = load i8, ptr [[GETELEMENTPTR15]], align 1 +; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[LOAD16]], [[LOAD14]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[LOAD12]] +; CHECK-NEXT: store i8 [[ADD]], ptr [[GETELEMENTPTR11]], align 1 +; CHECK-NEXT: [[GETELEMENTPTR17:%.*]] = getelementptr inbounds nuw i8, ptr [[ARG2]], i64 [[PHI]] +; CHECK-NEXT: [[LOAD18:%.*]] = load i8, ptr [[GETELEMENTPTR17]], align 1 +; CHECK-NEXT: [[MUL19:%.*]] = mul i8 [[LOAD14]], [[LOAD14]] +; CHECK-NEXT: [[ADD20:%.*]] = add i8 [[LOAD18]], [[MUL19]] +; CHECK-NEXT: store i8 [[ADD20]], ptr [[GETELEMENTPTR17]], align 1 +; CHECK-NEXT: br label %[[BB21]] +; CHECK: [[BB21]]: +; CHECK-NEXT: [[ADD22]] = add nuw nsw i64 [[PHI]], 1 +; CHECK-NEXT: [[ICMP23:%.*]] = icmp eq i64 [[ADD22]], [[ZEXT]] +; CHECK-NEXT: br i1 [[ICMP23]], label %[[BB24]], label %[[BB8]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: [[BB24]]: +; CHECK-NEXT: br label %[[BB25]] +; CHECK: [[BB25]]: +; CHECK-NEXT: ret void +; +bb: + %icmp = icmp sgt i32 %arg, 0 + br i1 %icmp, label %bb7, label %bb25 + +bb7: ; preds = %bb + %zext = zext nneg i32 %arg to i64 + br label %bb8 + +bb8: ; preds = %bb21, %bb7 + %phi = phi i64 [ 0, %bb7 ], [ %add22, %bb21 ] + %getelementptr = getelementptr inbounds nuw i8, ptr %arg5, i64 %phi + %load = load i8, ptr %getelementptr, align 1 + %icmp9 = icmp ult i8 %load, %arg6 + br i1 %icmp9, label %bb21, label %bb10 + +bb10: ; preds = %bb8 + %getelementptr11 = getelementptr inbounds nuw i8, ptr %arg1, i64 %phi + %load12 = load i8, ptr %getelementptr11, align 1 + %getelementptr13 = getelementptr inbounds nuw i8, ptr %arg3, i64 %phi + %load14 = load i8, ptr %getelementptr13, align 1 + %getelementptr15 = getelementptr inbounds nuw i8, ptr %arg4, i64 %phi + %load16 = load i8, ptr %getelementptr15, align 1 + %mul = mul i8 %load16, %load14 + %add = add i8 %mul, %load12 + store i8 %add, ptr %getelementptr11, align 1 + %getelementptr17 = getelementptr inbounds nuw i8, ptr %arg2, i64 %phi + %load18 = load i8, ptr %getelementptr17, align 1 + %mul19 = mul i8 %load14, %load14 + %add20 = add i8 %load18, %mul19 + store i8 %add20, ptr %getelementptr17, align 1 + br label %bb21 + +bb21: ; preds = %bb10, %bb8 + %add22 = add nuw nsw i64 %phi, 1 + %icmp23 = icmp eq i64 %add22, %zext + br i1 %icmp23, label %bb24, label %bb8, !llvm.loop !0 + +bb24: ; preds = %bb21 + br label %bb25 + +bb25: ; preds = %bb24, %bb + ret void +} + +attributes #0 = { uwtable vscale_range(1,16) "aarch64_pstate_sm_body" "target-features"="+fp-armv8,+neon,+sme,+v8a,-fmv" } + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.mustprogress"} +!2 = !{!"llvm.loop.vectorize.width", i32 16} +!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} +; CHECK: [[META2]] = distinct !{[[META2]], !"LVerDomain"} +; CHECK: [[META3]] = !{[[META4:![0-9]+]]} +; CHECK: [[META4]] = distinct !{[[META4]], [[META2]]} +; CHECK: [[META5]] = !{[[META6:![0-9]+]], [[META1]], [[META7:![0-9]+]], [[META8:![0-9]+]]} +; CHECK: [[META6]] = distinct !{[[META6]], [[META2]]} +; CHECK: [[META7]] = distinct !{[[META7]], [[META2]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META2]]} +; CHECK: [[META9]] = !{[[META7]]} +; CHECK: [[META10]] = !{[[META8]]} +; CHECK: [[META11]] = !{[[META6]]} +; CHECK: [[META12]] = !{[[META1]], [[META7]], [[META8]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META14:![0-9]+]], [[META15:![0-9]+]], [[META16:![0-9]+]]} +; CHECK: [[META14]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[META15]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META16]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META14]], [[META15]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index cdddcc9..68cfc65 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 -; RUN: opt -p loop-vectorize -max-interleave-group-factor=4 -S %s | FileCheck %s +; RUN: opt -p loop-vectorize -S %s | FileCheck %s target triple = "arm64-apple-macosx15.0.0" diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll index 54b7f2a..f2ae327 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/interleave_vec.ll @@ -925,20 +925,20 @@ define void @same_op8_splat(ptr noalias noundef %a, ptr noundef %b, ptr noundef ; CHECK-SAME: ptr noalias noundef captures(none) [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]], ptr noundef readonly captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[C]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x float>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <32 x float>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <32 x float> [[WIDE_VEC]], [[TMP1]] -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <32 x float> [[WIDE_VEC19]], [[TMP4]] -; CHECK-NEXT: store <32 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[WIDE_VEC19:%.*]] = load <16 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <16 x float> [[WIDE_VEC]], [[TMP1]] +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = fadd fast <16 x float> [[WIDE_VEC19]], [[TMP4]] +; CHECK-NEXT: store <16 x float> [[INTERLEAVED_VEC]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 144 ; CHECK-NEXT: br i1 [[TMP25]], label %[[FOR_END11:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[FOR_END11]]: diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll new file mode 100644 index 0000000..2926371 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR + +define i64 @test1(i64 %i) nounwind readnone { + %loc = alloca i64 + %j = load i64, ptr %loc + %r = add i64 %i, %j + ret i64 %r +} + +define i64 @test2(i32 %i) nounwind readnone { + %loc = alloca i32 + %j = load i32, ptr %loc + %r = add i32 %i, %j + %ext = zext i32 %r to i64 + ret i64 %ext +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected new file mode 100644 index 0000000..88cb03e --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_mixed.ll.expected @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=MIR + +define i64 @test1(i64 %i) nounwind readnone { +; ASM-LABEL: test1: +; ASM: # %bb.0: +; ASM-NEXT: movq %rdi, %rax +; ASM-NEXT: addq -{{[0-9]+}}(%rsp), %rax +; ASM-NEXT: retq +; MIR-LABEL: name: test1 +; MIR: bb.0 (%ir-block.0): +; MIR-NEXT: liveins: $rdi +; MIR-NEXT: {{ $}} +; MIR-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdi +; MIR-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s64) from %ir.loc) +; MIR-NEXT: $rax = COPY [[ADD64rm]] +; MIR-NEXT: RET 0, $rax + %loc = alloca i64 + %j = load i64, ptr %loc + %r = add i64 %i, %j + ret i64 %r +} + +define i64 @test2(i32 %i) nounwind readnone { +; ASM-LABEL: test2: +; ASM: # %bb.0: +; ASM-NEXT: movl %edi, %eax +; ASM-NEXT: addl -{{[0-9]+}}(%rsp), %eax +; ASM-NEXT: retq +; MIR-LABEL: name: test2 +; MIR: bb.0 (%ir-block.0): +; MIR-NEXT: liveins: $edi +; MIR-NEXT: {{ $}} +; MIR-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi +; MIR-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[COPY]], %stack.0.loc, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (dereferenceable load (s32) from %ir.loc) +; MIR-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[ADD32rm]], %subreg.sub_32bit +; MIR-NEXT: $rax = COPY [[SUBREG_TO_REG]] +; MIR-NEXT: RET 0, $rax + %loc = alloca i32 + %j = load i32, ptr %loc + %r = add i32 %i, %j + %ext = zext i32 %r to i64 + ret i64 %ext +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll new file mode 100644 index 0000000..7167bcf --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK + +define i32 @add(i32 %a, i32 %b) { + %sum = add i32 %a, %b + ret i32 %sum +} + +define i32 @sub(i32 %a, i32 %b) { + %diff = sub i32 %a, %b + ret i32 %diff +} + diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected new file mode 100644 index 0000000..1ba920d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_asm_mir_same_prefix.ll.expected @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=x86_64 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK + +define i32 @add(i32 %a, i32 %b) { + %sum = add i32 %a, %b + ret i32 %sum +} + +define i32 @sub(i32 %a, i32 %b) { + %diff = sub i32 %a, %b + ret i32 %diff +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test new file mode 100644 index 0000000..6fc57b5 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-mixed.test @@ -0,0 +1,9 @@ +# REQUIRES: x86-registered-target +## Test checking that update_llc_test_checks.py can generate both ASM and MIR checks in the same file + +# RUN: cp -f %S/Inputs/x86_asm_mir_mixed.ll %t.ll && %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll + +## Verify that running the script again on an already updated file doesn't add duplicate checks +# RUN: %update_llc_test_checks %t.ll +# RUN: diff -u %S/Inputs/x86_asm_mir_mixed.ll.expected %t.ll diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test new file mode 100644 index 0000000..0f8aaa54 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/x86-asm-mir-same-prefix.test @@ -0,0 +1,8 @@ +# REQUIRES: x86-registered-target +## Test that using the same prefix for both ASM and MIR outputs generates a warning +## and doesn't produce any checks. + +# RUN: cp -f %S/Inputs/x86_asm_mir_same_prefix.ll %t.ll && %update_llc_test_checks %t.ll 2>&1 | FileCheck %s --check-prefix=WARNING +# RUN: diff -u %S/Inputs/x86_asm_mir_same_prefix.ll.expected %t.ll + +# WARNING: WARNING: The following prefixes are used for both ASM and MIR output, which will cause FileCheck failures: CHECK |
