diff options
Diffstat (limited to 'llvm/test')
124 files changed, 14700 insertions, 8852 deletions
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll index 8d091a0..d380104 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll @@ -61,7 +61,7 @@ define void @umin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b))) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void umin(unsigned a, unsigned b) { ; a *= 2; @@ -157,7 +157,7 @@ define void @smin(i32 noundef %a, i32 noundef %b) { ; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>)) ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>)) -; CHECK-NEXT: Loop %for.body: Trip multiple is 1 +; CHECK-NEXT: Loop %for.body: Trip multiple is 2 ; ; void smin(signed a, signed b) { ; a *= 2; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll index b1fe7b1..7ba422d 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll @@ -615,22 +615,14 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption -; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. -; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) -; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 -; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 -; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 -; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) -; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 4) ] @@ -652,22 +644,14 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption -; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. -; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. -; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) -; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 -; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 -; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 -; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) -; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 +; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) +; CHECK-NEXT: Loop %loop: Trip multiple is 1 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir index 6362ed6..9381f0f4 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir @@ -1,11 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s -# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s +# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s ... --- name: fconstant_to_constant_s32 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -24,16 +25,17 @@ body: | ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x0 - %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 - %1:_(s64) = G_CONSTANT i64 524 - %2:_(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %3(s32), %2(p0) :: (store (s32)) + %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000 + %2:_(s64) = G_CONSTANT i64 524 + %3:_(p0) = G_PTR_ADD %0, %2(s64) + G_STORE %1(s32), %3(p0) :: (store (s32)) RET_ReallyLR ... --- name: fconstant_to_constant_s64 alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -48,7 +50,7 @@ body: | ; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64)) ; CHECK-NEXT: RET_ReallyLR %ptr:_(p0) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 G_STORE %c(s64), %ptr(p0) :: (store (s64)) RET_ReallyLR ... @@ -56,6 +58,7 @@ body: | name: no_store_means_no_combine alignment: 4 tracksRegLiveness: true +legalized: true frameInfo: maxAlignment: 1 machineFunctionInfo: {} @@ -71,7 +74,7 @@ body: | ; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c ; CHECK-NEXT: RET_ReallyLR implicit %add(s64) %v:_(s64) = COPY $x0 - %c:_(s64) = G_FCONSTANT double 0.0 + %c:_(s64) = G_FCONSTANT double 0.000000e+00 %add:_(s64) = G_FADD %v, %c - RET_ReallyLR implicit %add + RET_ReallyLR implicit %add(s64) ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index c301e76..c00ce22 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -48,8 +48,9 @@ body: | ; CHECK-NEXT: $w0 = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK-NEXT: $x0 = COPY [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C2]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) %0:_(s32) = G_FCONSTANT float 1.0 $w0 = COPY %0 %1:_(s64) = G_FCONSTANT double 2.0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir index ddf219d..c6df345 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir @@ -8,7 +8,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16 - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -26,7 +26,7 @@ tracksRegLiveness: true body: | bb.0: ; NO-FP16-LABEL: name: fp16_non_zero - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000 ; NO-FP16-NEXT: $h0 = COPY %cst(s16) ; NO-FP16-NEXT: RET_ReallyLR implicit $h0 ; @@ -44,7 +44,7 @@ tracksRegLiveness: true body: | bb.1.entry: ; NO-FP16-LABEL: name: nan - ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745 + ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01 ; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16) ; NO-FP16-NEXT: $w0 = COPY %ext(s32) ; NO-FP16-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index d721b73c..896603d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -70,12 +70,12 @@ # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # -# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices +# DEBUG-NEXT: G_ABDS (opcode [[G_ABDS:[0-9]+]]): 1 type index, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # -# DEBUG-NEXT: G_ABDU (opcode 66): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: G_ABDU (opcode [[G_ABDU:[0-9]+]]): 1 type index, 0 imm indices +# DEBUG-NEXT: .. opcode [[G_ABDU]] is aliased to [[G_ABDS]] # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll index cb5df07..322a96a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) { ; ; GISEL-LABEL: postidx32_shalf: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #0 ; =0x0 -; GISEL-NEXT: ldr h1, [x0], #4 -; GISEL-NEXT: fmov s2, w8 +; GISEL-NEXT: movi d1, #0000000000000000 +; GISEL-NEXT: ldr h2, [x0], #4 ; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0 ; GISEL-NEXT: fmov w9, s0 -; GISEL-NEXT: fcvt s3, h1 -; GISEL-NEXT: fmov w8, s1 -; GISEL-NEXT: fcvt s2, h2 -; GISEL-NEXT: fcmp s3, s2 +; GISEL-NEXT: fcvt s3, h2 +; GISEL-NEXT: fmov w8, s2 +; GISEL-NEXT: fcvt s1, h1 +; GISEL-NEXT: fcmp s3, s1 ; GISEL-NEXT: csel w8, w8, w9, mi ; GISEL-NEXT: strh w8, [x1] ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index adc536d..b234ef7 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) { ; ; CHECK-CVT-GI-LABEL: test_fccmp: ; CHECK-CVT-GI: // %bb.0: -; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500 -; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800 +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0 ; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0 ; CHECK-CVT-GI-NEXT: fcvt s2, h0 -; CHECK-CVT-GI-NEXT: fmov s1, w8 -; CHECK-CVT-GI-NEXT: fmov s3, w9 -; CHECK-CVT-GI-NEXT: fmov w9, s0 -; CHECK-CVT-GI-NEXT: fcvt s1, h1 -; CHECK-CVT-GI-NEXT: fcvt s3, h3 -; CHECK-CVT-GI-NEXT: fcmp s2, s1 -; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi -; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt +; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0] +; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1 +; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1] +; CHECK-CVT-GI-NEXT: fmov w8, s0 +; CHECK-CVT-GI-NEXT: fcvt s3, h1 +; CHECK-CVT-GI-NEXT: fmov w9, s1 +; CHECK-CVT-GI-NEXT: fcvt s4, h4 +; CHECK-CVT-GI-NEXT: fcmp s2, s3 +; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi +; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt ; CHECK-CVT-GI-NEXT: strh w8, [x0] ; CHECK-CVT-GI-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index 51aad4fe..7409bfb 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: scvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, w0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) { ; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: ; CHECK-GI-NO16-NEXT: ucvtf s0, x0 -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 -; CHECK-GI-NO16-NEXT: fmov s1, w8 -; CHECK-GI-NO16-NEXT: fcvt h0, s0 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 +; CHECK-GI-NO16-NEXT: fcvt h0, s0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 ; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 @@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { ; ; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15: ; CHECK-GI-NO16: // %bb.0: -; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800 +; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0 ; CHECK-GI-NO16-NEXT: fcvt s0, h0 -; CHECK-GI-NO16-NEXT: fmov s1, w8 +; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0] ; CHECK-GI-NO16-NEXT: fcvt s1, h1 ; CHECK-GI-NO16-NEXT: fmul s0, s0, s1 ; CHECK-GI-NO16-NEXT: fcvt h0, s0 diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll index 91bb8ac..9eacb61 100644 --- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -12,22 +12,14 @@ ; => ; recip = 1.0 / D; a * recip; b * recip; c * recip; define void @three_fdiv_float(float %D, float %a, float %b, float %c) { -; CHECK-SD-LABEL: three_fdiv_float: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov s4, #1.00000000 -; CHECK-SD-NEXT: fdiv s4, s4, s0 -; CHECK-SD-NEXT: fmul s0, s1, s4 -; CHECK-SD-NEXT: fmul s1, s2, s4 -; CHECK-SD-NEXT: fmul s2, s3, s4 -; CHECK-SD-NEXT: b foo_3f -; -; CHECK-GI-LABEL: three_fdiv_float: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv s4, s1, s0 -; CHECK-GI-NEXT: fdiv s1, s2, s0 -; CHECK-GI-NEXT: fdiv s2, s3, s0 -; CHECK-GI-NEXT: fmov s0, s4 -; CHECK-GI-NEXT: b foo_3f +; CHECK-LABEL: three_fdiv_float: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s4, #1.00000000 +; CHECK-NEXT: fdiv s4, s4, s0 +; CHECK-NEXT: fmul s0, s1, s4 +; CHECK-NEXT: fmul s1, s2, s4 +; CHECK-NEXT: fmul s2, s3, s4 +; CHECK-NEXT: b foo_3f %div = fdiv arcp float %a, %D %div1 = fdiv arcp float %b, %D %div2 = fdiv arcp float %c, %D @@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) { } define void @three_fdiv_double(double %D, double %a, double %b, double %c) { -; CHECK-SD-LABEL: three_fdiv_double: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov d4, #1.00000000 -; CHECK-SD-NEXT: fdiv d4, d4, d0 -; CHECK-SD-NEXT: fmul d0, d1, d4 -; CHECK-SD-NEXT: fmul d1, d2, d4 -; CHECK-SD-NEXT: fmul d2, d3, d4 -; CHECK-SD-NEXT: b foo_3d -; -; CHECK-GI-LABEL: three_fdiv_double: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv d4, d1, d0 -; CHECK-GI-NEXT: fdiv d1, d2, d0 -; CHECK-GI-NEXT: fdiv d2, d3, d0 -; CHECK-GI-NEXT: fmov d0, d4 -; CHECK-GI-NEXT: b foo_3d +; CHECK-LABEL: three_fdiv_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d4, #1.00000000 +; CHECK-NEXT: fdiv d4, d4, d0 +; CHECK-NEXT: fmul d0, d1, d4 +; CHECK-NEXT: fmul d1, d2, d4 +; CHECK-NEXT: fmul d2, d3, d4 +; CHECK-NEXT: b foo_3d %div = fdiv arcp double %a, %D %div1 = fdiv arcp double %b, %D %div2 = fdiv arcp double %c, %D @@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) { } define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { -; CHECK-SD-LABEL: three_fdiv_4xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov v4.4s, #1.00000000 -; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s -; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s -; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s -; CHECK-SD-NEXT: b foo_3_4xf -; -; CHECK-GI-LABEL: three_fdiv_4xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s -; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s -; CHECK-GI-NEXT: mov v0.16b, v4.16b -; CHECK-GI-NEXT: b foo_3_4xf +; CHECK-LABEL: three_fdiv_4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf %div = fdiv arcp <4 x float> %a, %D %div1 = fdiv arcp <4 x float> %b, %D %div2 = fdiv arcp <4 x float> %c, %D @@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, } define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) { -; CHECK-SD-LABEL: three_fdiv_2xdouble: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov v4.2d, #1.00000000 -; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d -; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d -; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d -; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d -; CHECK-SD-NEXT: b foo_3_2xd -; -; CHECK-GI-LABEL: three_fdiv_2xdouble: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d -; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d -; CHECK-GI-NEXT: mov v0.16b, v4.16b -; CHECK-GI-NEXT: b foo_3_2xd +; CHECK-LABEL: three_fdiv_2xdouble: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.2d, #1.00000000 +; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d +; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d +; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d +; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d +; CHECK-NEXT: b foo_3_2xd %div = fdiv arcp <2 x double> %a, %D %div1 = fdiv arcp <2 x double> %b, %D %div2 = fdiv arcp <2 x double> %c, %D @@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) { ret void } -define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { -; CHECK-SD-LABEL: splat_three_fdiv_4xfloat: +define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 { +; CHECK-SD-LABEL: four_fdiv_multi_float: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-SD-NEXT: fmov v4.4s, #1.00000000 -; CHECK-SD-NEXT: dup v0.4s, v0.s[0] -; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s -; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s -; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s -; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s -; CHECK-SD-NEXT: b foo_3_4xf +; CHECK-SD-NEXT: fmov s4, #1.00000000 +; CHECK-SD-NEXT: fdiv s5, s4, s0 +; CHECK-SD-NEXT: fmul s4, s1, s5 +; CHECK-SD-NEXT: fmul s1, s2, s5 +; CHECK-SD-NEXT: fmul s2, s3, s5 +; CHECK-SD-NEXT: fmul s3, s0, s5 +; CHECK-SD-NEXT: fmov s0, s4 +; CHECK-SD-NEXT: b foo_4f ; -; CHECK-GI-LABEL: splat_three_fdiv_4xfloat: +; CHECK-GI-LABEL: four_fdiv_multi_float: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-GI-NEXT: dup v4.4s, v0.s[0] -; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s -; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s -; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s -; CHECK-GI-NEXT: b foo_3_4xf +; CHECK-GI-NEXT: fmov s4, #1.00000000 +; CHECK-GI-NEXT: fdiv s5, s4, s0 +; CHECK-GI-NEXT: fdiv s4, s0, s0 +; CHECK-GI-NEXT: fmul s0, s1, s5 +; CHECK-GI-NEXT: fmul s1, s2, s5 +; CHECK-GI-NEXT: fmul s2, s3, s5 +; CHECK-GI-NEXT: fmov s3, s4 +; CHECK-GI-NEXT: b foo_4f + %div = fdiv arcp float %a, %D + %div1 = fdiv arcp float %b, %D + %div2 = fdiv arcp float %c, %D + %div3 = fdiv arcp float %D, %D + tail call void @foo_4f(float %div, float %div1, float %div2, float %div3) + ret void +} + +define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: splat_three_fdiv_4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf %D.ins = insertelement <4 x float> poison, float %D, i64 0 %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer %div = fdiv arcp <4 x float> %a, %splat @@ -256,6 +245,7 @@ entry: } declare void @foo_3f(float, float, float) +declare void @foo_4f(float, float, float, float) declare void @foo_3d(double, double, double) declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>) declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll index 594a3ab..be07978 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll @@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v2HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 @@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_v3HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: add_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll index 18f463c..40925da 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8] -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop: @@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) { ; ; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop: ; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-GI-NOFP16-NEXT: mov x8, xzr -; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0 ; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop ; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8] +; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8] +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 ; CHECK-GI-NOFP16-NEXT: add x8, x8, #8 ; CHECK-GI-NOFP16-NEXT: cmp w8, #56 -; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h -; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: fmov s1, w9 +; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h +; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s +; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s +; CHECK-GI-NOFP16-NEXT: fcvt h1, s1 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 -; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s -; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s -; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 -; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1 +; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0 -; CHECK-GI-NOFP16-NEXT: fmov w9, s0 ; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1 ; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit -; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-GI-NOFP16-NEXT: ret ; ; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll index e1b2170..c10d6e9 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll @@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_HalfH: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0 -; CHECK-GI-NOFP16-NEXT: fmov s1, w8 +; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0] ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1] @@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) { ; ; CHECK-GI-NOFP16-LABEL: mul_2H: ; CHECK-GI-NOFP16: // %bb.0: -; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00 +; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0 -; CHECK-GI-NOFP16-NEXT: fmov s2, w8 +; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0] ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1] diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll index 7cc5051..003aa04 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll @@ -8759,9 +8759,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6 ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] -; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] @@ -8780,20 +8779,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 { ; GFX90A-NEXT: s_cbranch_execz .LBB113_6 ; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private ; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] -; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen -; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v4, vcc +; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4 ; GFX90A-NEXT: s_waitcnt vmcnt(1) -; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6 +; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v6 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 -; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc -; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 -; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4 -; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1 +; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2 +; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc +; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 ; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX90A-NEXT: ;;#ASMSTART @@ -8827,10 +8825,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] @@ -8856,11 +8853,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] ; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 +; GFX950-NEXT: s_nop 0 ; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off ; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] @@ -8900,9 +8897,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 { ; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] ; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2 ; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7] -; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] @@ -8918,18 +8914,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 { ; GFX90A-NEXT: s_cbranch_execz .LBB114_6 ; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private ; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GFX90A-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc -; GFX90A-NEXT: buffer_load_dword v4, v6, s[0:3], 0 offen -; GFX90A-NEXT: buffer_load_dword v5, v6, s[0:3], 0 offen offset:4 +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen +; GFX90A-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:4 ; GFX90A-NEXT: s_waitcnt vmcnt(1) -; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2 +; GFX90A-NEXT: v_sub_co_u32_e32 v1, vcc, v4, v2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5] -; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc -; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen -; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4 +; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 ; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX90A-NEXT: ;;#ASMSTART @@ -8962,10 +8957,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] @@ -8988,7 +8982,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc @@ -17064,9 +17057,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4 ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] -; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] @@ -17085,20 +17077,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 { ; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private ; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0 ; GFX90A-NEXT: s_cselect_b32 s4, s4, -1 -; GFX90A-NEXT: v_mov_b32_e32 v6, s4 -; GFX90A-NEXT: buffer_load_dword v0, v6, s[0:3], 0 offen -; GFX90A-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen offset:4 +; GFX90A-NEXT: v_mov_b32_e32 v0, s4 +; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4 ; GFX90A-NEXT: s_waitcnt vmcnt(1) -; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4 +; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v4 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0 -; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc -; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1 -; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc -; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4 -; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen +; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc +; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1 +; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc +; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2 +; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc +; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen +; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 ; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use a[0:1] @@ -17131,10 +17122,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] @@ -17158,11 +17148,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] ; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 +; GFX950-NEXT: s_nop 0 ; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 ; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0 ; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi ; GFX950-NEXT: ;;#ASMSTART @@ -17201,9 +17191,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 { ; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0 ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9] -; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] @@ -17226,7 +17215,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 { ; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen @@ -17262,10 +17250,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] @@ -17286,7 +17273,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc diff --git a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll index c98fff9..34a4899 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll @@ -5804,9 +5804,8 @@ define void @global_atomic_usub_sat_i64_ret_a_a(ptr addrspace(1) %ptr) #0 { ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v4, v6 ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v7, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5] -; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] @@ -5839,10 +5838,9 @@ define void @global_atomic_usub_sat_i64_ret_a_a(ptr addrspace(1) %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v4, v6 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v5, v7, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[4:5] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc ; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off offset:80 sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5] @@ -5880,9 +5878,8 @@ define void @global_atomic_usub_sat_i64_ret_av_av(ptr addrspace(1) %ptr) #0 { ; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1] ; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2 ; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7] -; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] @@ -5911,10 +5908,9 @@ define void @global_atomic_usub_sat_i64_ret_av_av(ptr addrspace(1) %ptr) #0 { ; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc ; GFX950-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] @@ -11573,9 +11569,8 @@ define void @global_atomic_usub_sat_i64_saddr_ret_a_a(ptr addrspace(1) inreg %pt ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4 ; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] -; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[16:17] offset:80 glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] @@ -11609,10 +11604,9 @@ define void @global_atomic_usub_sat_i64_saddr_ret_a_a(ptr addrspace(1) inreg %pt ; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX950-NEXT: global_atomic_cmpswap_x2 v[0:1], v6, v[0:3], s[0:1] offset:80 sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] @@ -11651,9 +11645,8 @@ define void @global_atomic_usub_sat_i64_saddr_ret_av_av(ptr addrspace(1) inreg % ; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0 ; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc -; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9] -; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc +; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX90A-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[16:17] offset:80 glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] @@ -11683,10 +11676,9 @@ define void @global_atomic_usub_sat_i64_saddr_ret_av_av(ptr addrspace(1) inreg % ; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc -; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc ; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[0:1] offset:80 sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/addsub64_carry.ll b/llvm/test/CodeGen/AMDGPU/addsub64_carry.ll index d326966..b72eba8 100644 --- a/llvm/test/CodeGen/AMDGPU/addsub64_carry.ll +++ b/llvm/test/CodeGen/AMDGPU/addsub64_carry.ll @@ -17,12 +17,9 @@ define %struct.uint96 @v_add64_32(i64 %val64A, i64 %val64B, i32 %val32) { ; CHECK-LABEL: v_add64_32: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, v0, v2 -; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v1, v3, vcc -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[5:6], v[0:1] -; CHECK-NEXT: v_mov_b32_e32 v0, v5 +; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc ; CHECK-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v4, vcc -; CHECK-NEXT: v_mov_b32_e32 v1, v6 ; CHECK-NEXT: s_setpc_b64 s[30:31] %sum64 = add i64 %val64A, %val64B %obit = icmp ult i64 %sum64, %val64A @@ -38,16 +35,14 @@ define <2 x i64> @v_uadd_v2i64(<2 x i64> %val0, <2 x i64> %val1, ptr %ptrval) { ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v2, v6 +; CHECK-NEXT: v_add_co_u32_e64 v4, s[4:5], v0, v4 ; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc -; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, v0, v4 -; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v5, vcc -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[4:5], v[0:1] -; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7] -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[6:7], v[2:3] -; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_addc_co_u32_e64 v5, s[4:5], v1, v5, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CHECK-NEXT: v_mov_b32_e32 v1, v0 ; CHECK-NEXT: v_mov_b32_e32 v3, v2 +; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] %pair = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> %val0, <2 x i64> %val1) @@ -63,16 +58,14 @@ define <2 x i64> @v_usub_v2i64(<2 x i64> %val0, <2 x i64> %val1, ptr %ptrval) { ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_sub_co_u32_e32 v6, vcc, v2, v6 +; CHECK-NEXT: v_sub_co_u32_e64 v4, s[4:5], v0, v4 ; CHECK-NEXT: v_subb_co_u32_e32 v7, vcc, v3, v7, vcc -; CHECK-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v4 -; CHECK-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v5, vcc -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[0:1] -; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7] -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3] -; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_subb_co_u32_e64 v5, s[4:5], v1, v5, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CHECK-NEXT: v_mov_b32_e32 v1, v0 ; CHECK-NEXT: v_mov_b32_e32 v3, v2 +; CHECK-NEXT: flat_store_dwordx4 v[8:9], v[4:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] %pair = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> %val0, <2 x i64> %val1) @@ -87,10 +80,9 @@ define i64 @v_uadd_i64(i64 %val0, i64 %val1, ptr %ptrval) { ; CHECK-LABEL: v_uadd_i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 -; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[2:3] +; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_mov_b32_e32 v1, v0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -109,7 +101,6 @@ define i64 @v_uadd_p1(i64 %val0, i64 %val1, ptr %ptrval) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_mov_b32_e32 v1, v0 @@ -147,10 +138,9 @@ define i64 @v_usub_p1(i64 %val0, i64 %val1, ptr %ptrval) { ; CHECK-LABEL: v_usub_p1: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, -1, v0 -; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v1, vcc -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[2:3] +; CHECK-NEXT: v_subrev_co_u32_e32 v0, vcc, 1, v0 +; CHECK-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_mov_b32_e32 v1, v0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -167,10 +157,9 @@ define i64 @v_usub_n1(i64 %val0, i64 %val1, ptr %ptrval) { ; CHECK-LABEL: v_usub_n1: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, 1, v0 -; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[2:3] +; CHECK-NEXT: v_subrev_co_u32_e32 v0, vcc, -1, v0 +; CHECK-NEXT: v_subbrev_co_u32_e32 v1, vcc, -1, v1, vcc +; CHECK-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc ; CHECK-NEXT: v_mov_b32_e32 v1, v0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -190,15 +179,13 @@ define i64 @v_usub_n1(i64 %val0, i64 %val1, ptr %ptrval) { define amdgpu_ps %struct.uint96 @s_add64_32(i64 inreg %val64A, i64 inreg %val64B, i32 inreg %val32) { ; CHECK-LABEL: s_add64_32: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s6, s0, s2 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: s_addc_u32 s7, s1, s3 -; CHECK-NEXT: v_mov_b32_e32 v1, s1 -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] -; CHECK-NEXT: s_mov_b32 s0, s6 -; CHECK-NEXT: s_cmp_lg_u64 vcc, 0 +; CHECK-NEXT: s_add_u32 s0, s0, s2 +; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0 +; CHECK-NEXT: s_addc_u32 s1, s1, s3 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0 ; CHECK-NEXT: s_addc_u32 s2, s4, 0 -; CHECK-NEXT: s_mov_b32 s1, s7 ; CHECK-NEXT: ; return to shader part epilog %sum64 = add i64 %val64A, %val64B %obit = icmp ult i64 %sum64, %val64A @@ -212,24 +199,24 @@ define amdgpu_ps %struct.uint96 @s_add64_32(i64 inreg %val64A, i64 inreg %val64B define amdgpu_ps <2 x i64> @s_uadd_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg %val1, ptr %ptrval) { ; CHECK-LABEL: s_uadd_v2i64: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s6, s2, s6 -; CHECK-NEXT: v_mov_b32_e32 v9, s3 -; CHECK-NEXT: s_addc_u32 s7, s3, s7 -; CHECK-NEXT: v_mov_b32_e32 v8, s2 -; CHECK-NEXT: s_add_u32 s4, s0, s4 -; CHECK-NEXT: v_mov_b32_e32 v7, s1 -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9] -; CHECK-NEXT: s_addc_u32 s5, s1, s5 -; CHECK-NEXT: v_mov_b32_e32 v6, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[6:7] -; CHECK-NEXT: v_readfirstlane_b32 s2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_readfirstlane_b32 s0, v6 -; CHECK-NEXT: v_mov_b32_e32 v2, s4 -; CHECK-NEXT: v_mov_b32_e32 v3, s5 -; CHECK-NEXT: v_mov_b32_e32 v4, s6 -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: s_add_u32 s10, s2, s6 +; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[8:9], 0 +; CHECK-NEXT: s_addc_u32 s8, s3, s7 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_add_u32 s0, s0, s4 +; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0 +; CHECK-NEXT: s_addc_u32 s1, s1, s5 +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[2:3] +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v7 +; CHECK-NEXT: v_readfirstlane_b32 s2, v6 +; CHECK-NEXT: v_mov_b32_e32 v4, s10 +; CHECK-NEXT: v_mov_b32_e32 v5, s8 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: s_mov_b32 s3, s2 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] @@ -246,24 +233,24 @@ define amdgpu_ps <2 x i64> @s_uadd_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg define amdgpu_ps <2 x i64> @s_usub_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg %val1, ptr %ptrval) { ; CHECK-LABEL: s_usub_v2i64: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_sub_u32 s6, s2, s6 -; CHECK-NEXT: v_mov_b32_e32 v9, s3 -; CHECK-NEXT: s_subb_u32 s7, s3, s7 -; CHECK-NEXT: v_mov_b32_e32 v8, s2 -; CHECK-NEXT: s_sub_u32 s4, s0, s4 -; CHECK-NEXT: v_mov_b32_e32 v7, s1 -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[8:9] -; CHECK-NEXT: s_subb_u32 s5, s1, s5 -; CHECK-NEXT: v_mov_b32_e32 v6, s0 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[6:7] -; CHECK-NEXT: v_readfirstlane_b32 s2, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_readfirstlane_b32 s0, v6 -; CHECK-NEXT: v_mov_b32_e32 v2, s4 -; CHECK-NEXT: v_mov_b32_e32 v3, s5 -; CHECK-NEXT: v_mov_b32_e32 v4, s6 -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: s_sub_u32 s10, s2, s6 +; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[8:9], 0 +; CHECK-NEXT: s_subb_u32 s8, s3, s7 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_sub_u32 s0, s0, s4 +; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[6:7], 0 +; CHECK-NEXT: s_subb_u32 s1, s1, s5 +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[2:3] +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[0:1] +; CHECK-NEXT: v_readfirstlane_b32 s0, v7 +; CHECK-NEXT: v_readfirstlane_b32 s2, v6 +; CHECK-NEXT: v_mov_b32_e32 v4, s10 +; CHECK-NEXT: v_mov_b32_e32 v5, s8 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: s_mov_b32 s3, s2 ; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] @@ -280,15 +267,15 @@ define amdgpu_ps <2 x i64> @s_usub_v2i64(<2 x i64> inreg %val0, <2 x i64> inreg define amdgpu_ps i64 @s_uadd_i64(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) { ; CHECK-LABEL: s_uadd_i64: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s2, s0, s2 -; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: s_addc_u32 s3, s1, s3 +; CHECK-NEXT: s_add_u32 s0, s0, s2 +; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0 +; CHECK-NEXT: s_addc_u32 s1, s1, s3 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: v_mov_b32_e32 v5, s3 -; CHECK-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3] -; CHECK-NEXT: v_mov_b32_e32 v4, s2 -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -305,10 +292,11 @@ define amdgpu_ps i64 @s_uadd_p1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) { ; CHECK-LABEL: s_uadd_p1: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_add_u32 s0, s0, 1 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0 -; CHECK-NEXT: v_mov_b32_e32 v3, s1 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: v_mov_b32_e32 v3, s1 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 ; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1] @@ -350,15 +338,15 @@ define amdgpu_ps i64 @s_uadd_n1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) { define amdgpu_ps i64 @s_usub_p1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) { ; CHECK-LABEL: s_usub_p1: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s2, s0, -1 -; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: s_addc_u32 s3, s1, -1 +; CHECK-NEXT: s_sub_u32 s0, s0, 1 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0 +; CHECK-NEXT: s_subb_u32 s1, s1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: v_mov_b32_e32 v5, s3 -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[2:3] -; CHECK-NEXT: v_mov_b32_e32 v4, s2 -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -374,15 +362,15 @@ define amdgpu_ps i64 @s_usub_p1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) { define amdgpu_ps i64 @s_usub_n1(i64 inreg %val0, i64 inreg %val1, ptr %ptrval) { ; CHECK-LABEL: s_usub_n1: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_add_u32 s2, s0, 1 -; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: s_addc_u32 s3, s1, 0 +; CHECK-NEXT: s_sub_u32 s0, s0, -1 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_cmp_lg_u64 s[2:3], 0 +; CHECK-NEXT: s_subb_u32 s1, s1, -1 ; CHECK-NEXT: v_mov_b32_e32 v2, s0 -; CHECK-NEXT: v_mov_b32_e32 v5, s3 -; CHECK-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[2:3] -; CHECK-NEXT: v_mov_b32_e32 v4, s2 -; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll new file mode 100644 index 0000000..f730199 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll @@ -0,0 +1,1066 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4 +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s + +; Shrink result attribute list by preventing use of most attributes. +define internal void @use_most() { +; CHECK-LABEL: define internal void @use_most( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [256 x i8], align 1, addrspace(5) +; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x() +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y() +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z() +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.cluster.id.x() +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.cluster.id.y() +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.cluster.id.z() +; CHECK-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() +; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr() +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.dispatch.id() +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[IMPLICIT_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr [[ALLOCA_CAST]], ptr addrspace(4) [[IMPLICIT_ARG_PTR]], i64 256, i1 false) +; CHECK-NEXT: ret void +; + %alloca = alloca [256 x i8], addrspace(5) + %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr + call i32 @llvm.amdgcn.workitem.id.x() + call i32 @llvm.amdgcn.workitem.id.y() + call i32 @llvm.amdgcn.workitem.id.z() + call i32 @llvm.amdgcn.workgroup.id.x() + call i32 @llvm.amdgcn.workgroup.id.y() + call i32 @llvm.amdgcn.workgroup.id.z() + call i32 @llvm.amdgcn.cluster.id.x() + call i32 @llvm.amdgcn.cluster.id.y() + call i32 @llvm.amdgcn.cluster.id.z() + call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() + call ptr addrspace(4) @llvm.amdgcn.queue.ptr() + call i64 @llvm.amdgcn.dispatch.id() + call i32 @llvm.amdgcn.lds.kernel.id() + %implicit.arg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + call void @llvm.memcpy.p0.p4(ptr %alloca.cast, ptr addrspace(4) %implicit.arg.ptr, i64 256, i1 false) + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(i32 poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call i32 asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple( +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call i64 asm sideeffect "; def $0", "={a[0:1]}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_non_agpr_asm() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "v"(i32 poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "{a0}"(i32 poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison) + call void @use_most() + ret void +} + +define void @func_uses_asm_virtreg_agpr() { +; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(i32 poison) + call void @use_most() + ret void +} + +define void @func_uses_asm_physreg_agpr() { +; CHECK-LABEL: define void @func_uses_asm_physreg_agpr( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "{a0}"(i32 poison) + call void @use_most() + ret void +} + +define void @func_uses_asm_physreg_agpr_tuple() { +; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison) + call void @use_most() + ret void +} + +declare void @unknown() + +define amdgpu_kernel void @kernel_calls_extern() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern( +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @unknown() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-NEXT: call void @unknown() #[[ATTR29:[0-9]+]] +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @unknown() #0 + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect( +; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: call void [[INDIRECT]]() +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void %indirect() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite( +; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR29]] +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void %indirect() #0 + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @func_uses_asm_physreg_agpr() + call void @use_most() + ret void +} + +define void @empty() { +; CHECK-LABEL: define void @empty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @use_most() + ret void +} + +define void @also_empty() { +; CHECK-LABEL: define void @also_empty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_calls_empty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @empty() +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @empty() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @empty() +; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @empty() + call void @func_uses_asm_physreg_agpr() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic( +; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false) +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false) + call void @use_most() + ret void +} + +declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32 immarg, i32 immarg, i32 immarg) + +define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32( +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0) +; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128 +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0) + store <32 x float> %result, ptr addrspace(1) %out + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x( +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4 +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %result = call i32 @llvm.amdgcn.workitem.id.x() + store i32 %result, ptr addrspace(1) %out + call void @use_most() + ret void +} + +define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { +; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr( +; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: call void @also_empty() +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]] +; CHECK: 4: +; CHECK-NEXT: call void @empty() +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 5: +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %fptr = select i1 %cond, ptr @empty, ptr @also_empty + call void %fptr() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1( +; CHECK-SAME: ) #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(ptr poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call ptr asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty( +; CHECK-SAME: ) #[[ATTR5]] { +; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <2 x ptr> asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0( +; CHECK-SAME: ) #[[ATTR6:[0-9]+]] { +; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber( +; CHECK-SAME: ) #[[ATTR7:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a4}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple( +; CHECK-SAME: ) #[[ATTR8:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a[10:13]}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_oob() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob( +; CHECK-SAME: ) #[[ATTR9:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a256}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_clobber_max() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max( +; CHECK-SAME: ) #[[ATTR9]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; clobber $0", "~{a255}"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_physreg_oob() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob( +; CHECK-SAME: ) #[[ATTR9]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "{a256}"(i32 poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty( +; CHECK-SAME: ) #[[ATTR10:[0-9]+]] { +; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <32 x i32> asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty( +; CHECK-SAME: ) #[[ATTR10]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<32 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty( +; CHECK-SAME: ) #[[ATTR10]] { +; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @vreg_use_exceeds_register_file() { +; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file( +; CHECK-SAME: ) #[[ATTR9]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<257 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @vreg_def_exceeds_register_file() { +; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file( +; CHECK-SAME: ) #[[ATTR9]] { +; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <257 x i32> asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @multiple() { +; CHECK-LABEL: define amdgpu_kernel void @multiple( +; CHECK-SAME: ) #[[ATTR10]] { +; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999) + call void @use_most() + ret void +} + +define amdgpu_kernel void @earlyclobber_0() { +; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0( +; CHECK-SAME: ) #[[ATTR11:[0-9]+]] { +; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0) + call void @use_most() + ret void +} + +define amdgpu_kernel void @earlyclobber_1() { +; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1( +; CHECK-SAME: ) #[[ATTR12:[0-9]+]] { +; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1)) + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512( +; CHECK-SAME: ) #[[ATTR13:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 poison, <8 x i32> poison, <16 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512( +; CHECK-SAME: ) #[[ATTR13]] { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call {i32, <8 x i32>, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256( +; CHECK-SAME: ) #[[ATTR14:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call {i32, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,a"(<8 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() { +; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0( +; CHECK-SAME: ) #[[ATTR11]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "{a[1:4]},a"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() { +; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1( +; CHECK-SAME: ) #[[ATTR15:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,{a[0:3]}"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_raises_limit() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit( +; CHECK-SAME: ) #[[ATTR16:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,{a[5:8]}"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() { +; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit( +; CHECK-SAME: ) #[[ATTR11]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,{a[1:4]}"(<4 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @align3_virtreg() { +; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg( +; CHECK-SAME: ) #[[ATTR6]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <3 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @align3_align4_virtreg() { +; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg( +; CHECK-SAME: ) #[[ATTR15]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @align2_align4_virtreg() { +; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg( +; CHECK-SAME: ) #[[ATTR15]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0, $1", "a,a"(<2 x i32> poison, <4 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_a55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55( +; CHECK-SAME: ) #[[ATTR17:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !0, i32 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_v55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0) +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !1, i32 0) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_a55_57() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57( +; CHECK-SAME: ) #[[ATTR18:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0) +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !2, i96 0) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19:[0-9]+]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]]) +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_register.i64(metadata !0) + store i32 %reg, ptr addrspace(1) %ptr + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]]) +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_volatile_register.i64(metadata !0) + store i32 %reg, ptr addrspace(1) %ptr + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] { +; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]]) +; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %reg = call i128 @llvm.read_register.i64(metadata !3) + store i128 %reg, ptr addrspace(1) %ptr + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256( +; CHECK-SAME: ) #[[ATTR9]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0) +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !4, i32 0) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_multiple_uses() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_uses( +; CHECK-SAME: ) #[[ATTR5]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(i64 poison) + call void asm sideeffect "; use $0", "a"(i32 poison) + call void asm sideeffect "; use $0", "a"(i128 poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_multiple_defs() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_defs( +; CHECK-SAME: ) #[[ATTR5]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i64 asm sideeffect " +; CHECK-NEXT: [[TMP2:%.*]] = call i32 asm sideeffect " +; CHECK-NEXT: [[TMP3:%.*]] = call i128 asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call i64 asm sideeffect "; def $0", "=a"() + call i32 asm sideeffect "; def $0", "=a"() + call i128 asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_multiple_use_defs() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_use_defs( +; CHECK-SAME: ) #[[ATTR5]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: [[TMP1:%.*]] = call i128 asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(i32 poison) + call i128 asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define void @callgraph_b() { +; CHECK-LABEL: define void @callgraph_b( +; CHECK-SAME: ) #[[ATTR15]] { +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call <4 x i32> asm sideeffect "; def $0", "=a"() + call void asm sideeffect "; use $0", "a"(<8 x i32> poison) + call void @use_most() + ret void +} + +define void @callgraph_c() { +; CHECK-LABEL: define void @callgraph_c( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 asm sideeffect " +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call i32 asm sideeffect "; def $0", "=a"() + call void asm sideeffect "; use $0", "a"(<2 x i32> poison) + call void @use_most() + ret void +} + +define void @callgraph_a(i1 %cond) { +; CHECK-LABEL: define void @callgraph_a( +; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] { +; CHECK-NEXT: br i1 [[COND]], label [[A:%.*]], label [[B:%.*]] +; CHECK: a: +; CHECK-NEXT: call void @callgraph_b() +; CHECK-NEXT: ret void +; CHECK: b: +; CHECK-NEXT: call void @callgraph_c() +; CHECK-NEXT: ret void +; + br i1 %cond, label %a, label %b + +a: + call void @callgraph_b() + ret void + +b: + call void @callgraph_c() + ret void +} + + +define void @kernel_max_callgraph(i1 %cond) { +; CHECK-LABEL: define void @kernel_max_callgraph( +; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] { +; CHECK-NEXT: call void @callgraph_a(i1 [[COND]]) +; CHECK-NEXT: ret void +; + call void @callgraph_a(i1 %cond) + ret void +} + +define amdgpu_kernel void @kernel_uses_all_virtregs() #1 { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs( +; CHECK-SAME: ) #[[ATTR21:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison) + call void @use_most() + ret void +} + +define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1( +; CHECK-SAME: ) #[[ATTR21]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, i32 poison) + call void @use_most() + ret void +} + +define void @recursive() { +; CHECK-LABEL: define void @recursive( +; CHECK-SAME: ) #[[ATTR22:[0-9]+]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: call void @recursive() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<7 x i32> poison) + call void @use_most() + call void @recursive() + ret void +} + +define void @indirect_0() { +; CHECK-LABEL: define void @indirect_0( +; CHECK-SAME: ) #[[ATTR22]] { +; CHECK-NEXT: call void asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void asm sideeffect "; use $0", "a"(<7 x i32> poison) + call void @use_most() + ret void +} + +define void @indirect_1() { +; CHECK-LABEL: define void @indirect_1( +; CHECK-SAME: ) #[[ATTR23:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i32> asm sideeffect " +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call <3 x i32> asm sideeffect "; def $0", "=a"() + call void @use_most() + ret void +} + +define amdgpu_kernel void @knowable_indirect_call(i1 %cond) { +; CHECK-LABEL: define amdgpu_kernel void @knowable_indirect_call( +; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR22]] { +; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @indirect_0, ptr @indirect_1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @indirect_1 +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: call void @indirect_1() +; CHECK-NEXT: br label [[TMP6:%.*]] +; CHECK: 3: +; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]] +; CHECK: 4: +; CHECK-NEXT: call void @indirect_0() +; CHECK-NEXT: br label [[TMP6]] +; CHECK: 5: +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + %fptr = select i1 %cond, ptr @indirect_0, ptr @indirect_1 + call void %fptr() + call void @use_most() + ret void +} + +define amdgpu_kernel void @calls_poison(i1 %cond) { +; CHECK-LABEL: define amdgpu_kernel void @calls_poison( +; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: call void poison() +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void poison() + call void @use_most() + ret void +} + +define amdgpu_kernel void @calls_null(i1 %cond) { +; CHECK-LABEL: define amdgpu_kernel void @calls_null( +; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: call void null() +; CHECK-NEXT: call void @use_most() +; CHECK-NEXT: ret void +; + call void null() + call void @use_most() + ret void +} + +define amdgpu_kernel void @indirect_unknown(ptr %fptr) { +; CHECK-LABEL: define amdgpu_kernel void @indirect_unknown( +; CHECK-SAME: ptr [[FPTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: call void [[FPTR]]() +; CHECK-NEXT: ret void +; + call void %fptr() + ret void +} + +attributes #0 = { "amdgpu-agpr-alloc"="0" } +attributes #1 = { "amdgpu-waves-per-eu"="1,1" } + +!0 = !{!"a55"} +!1 = !{!"v55"} +!2 = !{!"a[55:57]"} +!3 = !{!"a[56:59]"} +!4 = !{!"a256"} + +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="14" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="256" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="32" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="9" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="64" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="49" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="60" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR27:[0-9]+]] = { nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR28:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR29]] = { "amdgpu-agpr-alloc"="0" } +;. +; CHECK: [[META0]] = !{!"a55"} +; CHECK: [[META1]] = !{!"v55"} +; CHECK: [[META2]] = !{!"a[55:57]"} +; CHECK: [[META3]] = !{!"a[56:59]"} +; CHECK: [[META4]] = !{!"a256"} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll deleted file mode 100644 index 664dfa2..0000000 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ /dev/null @@ -1,264 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4 -; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s - -define amdgpu_kernel void @kernel_uses_asm_virtreg() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg( -; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "a"(i32 poison) - ret void -} - -define amdgpu_kernel void @kernel_uses_asm_virtreg_def() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect " -; CHECK-NEXT: ret void -; - %def = call i32 asm sideeffect "; def $0", "=a"() - ret void -} - -define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect " -; CHECK-NEXT: ret void -; - %def = call i64 asm sideeffect "; def $0", "={a[0:1]}"() - ret void -} - -define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison) - ret void -} - -define amdgpu_kernel void @kernel_uses_non_agpr_asm() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm( -; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "v"(i32 poison) - ret void -} - -define amdgpu_kernel void @kernel_uses_asm_physreg() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "{a0}"(i32 poison) - ret void -} - -define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison) - ret void -} - -define void @func_uses_asm_virtreg_agpr() { -; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "a"(i32 poison) - ret void -} - -define void @func_uses_asm_physreg_agpr() { -; CHECK-LABEL: define void @func_uses_asm_physreg_agpr( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "{a0}"(i32 poison) - ret void -} - -define void @func_uses_asm_physreg_agpr_tuple() { -; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void asm sideeffect " -; CHECK-NEXT: ret void -; - call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison) - ret void -} - -declare void @unknown() - -define amdgpu_kernel void @kernel_calls_extern() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern( -; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void -; - call void @unknown() - ret void -} - -define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite( -; CHECK-SAME: ) #[[ATTR2]] { -; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] -; CHECK-NEXT: ret void -; - call void @unknown() #0 - ret void -} - -define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect( -; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: call void [[INDIRECT]]() -; CHECK-NEXT: ret void -; - call void %indirect() - ret void -} - -define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite( -; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]] -; CHECK-NEXT: ret void -; - call void %indirect() #0 - ret void -} - -define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() -; CHECK-NEXT: ret void -; - call void @func_uses_asm_physreg_agpr() - ret void -} - -define void @empty() { -; CHECK-LABEL: define void @empty( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: ret void -; - ret void -} - -define void @also_empty() { -; CHECK-LABEL: define void @also_empty( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: ret void -; - ret void -} - -define amdgpu_kernel void @kernel_calls_empty() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty( -; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: call void @empty() -; CHECK-NEXT: ret void -; - call void @empty() - ret void -} - -define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr( -; CHECK-SAME: ) #[[ATTR0]] { -; CHECK-NEXT: call void @empty() -; CHECK-NEXT: call void @func_uses_asm_physreg_agpr() -; CHECK-NEXT: ret void -; - call void @empty() - call void @func_uses_asm_physreg_agpr() - ret void -} - -define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic( -; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false) -; CHECK-NEXT: ret void -; - call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false) - ret void -} - -declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32 immarg, i32 immarg, i32 immarg) - -define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32( -; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0) -; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128 -; CHECK-NEXT: ret void -; - %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0) - store <32 x float> %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) { -; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x( -; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4 -; CHECK-NEXT: ret void -; - %result = call i32 @llvm.amdgcn.workitem.id.x() - store i32 %result, ptr addrspace(1) %out - ret void -} - -define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) { -; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr( -; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty -; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] -; CHECK: 2: -; CHECK-NEXT: call void @also_empty() -; CHECK-NEXT: br label [[TMP6:%.*]] -; CHECK: 3: -; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]] -; CHECK: 4: -; CHECK-NEXT: call void @empty() -; CHECK-NEXT: br label [[TMP6]] -; CHECK: 5: -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: ret void -; - %fptr = select i1 %cond, ptr @empty, ptr @also_empty - call void %fptr() - ret void -} - - -attributes #0 = { "amdgpu-agpr-alloc"="0" } -;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" } -;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll new file mode 100644 index 0000000..6c4f504 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll @@ -0,0 +1,452 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O3 -S < %s | FileCheck %s -check-prefix=O3-CHECK + +define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]] +; CURRENT-CHECK: [[IF_PEEL]]: +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[EXIT]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ 0, %entry ], [ 1, %if ] + %not_done = xor i1 %done, true + %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done) + %is_done = icmp eq i64 %ballot, 0 ; in this case is_done = !not_done + br i1 %is_done, label %exit, label %if + +if: + store i32 5, ptr addrspace(1) %out + br label %while + +exit: + ret void +} + +define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]] +; CURRENT-CHECK: [[IF_PEEL]]: +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[EXIT]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ 0, %entry ], [ 1, %if ] + %not_done = xor i1 %done, true + %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done) + %is_done = icmp eq i64 0, %ballot ; in this case is_done = !not_done + br i1 %is_done, label %exit, label %if + +if: + store i32 5, ptr addrspace(1) %out + br label %while + +exit: + ret void +} + +define protected amdgpu_kernel void @trivial_waterfall_ne_zero(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]] +; CURRENT-CHECK: [[WHILE]]: +; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ 0, %entry ], [ 1, %if ] + %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done) + %is_done = icmp ne i64 0, %ballot ; in this case is_done = done + br i1 %is_done, label %exit, label %if + +if: + store i32 5, ptr addrspace(1) %out + br label %while + +exit: + ret void +} + +define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]] +; CURRENT-CHECK: [[WHILE]]: +; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ 0, %entry ], [ 1, %if ] + %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done) + %is_done = icmp ne i64 %ballot, 0 ; in this case is_done = done + br i1 %is_done, label %exit, label %if + +if: + store i32 5, ptr addrspace(1) %out + br label %while + +exit: + ret void +} + +define protected amdgpu_kernel void @trivial_uniform_waterfall(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[WORK_PEEL:.*]] +; CURRENT-CHECK: [[WORK_PEEL]]: +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[EXIT]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ] +; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 0, 0 +; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]] +; PASS-CHECK: [[WORK]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[TAIL]] +; PASS-CHECK: [[TAIL]]: +; PASS-CHECK-NEXT: [[NEW_DONE]] = phi i1 [ true, %[[WORK]] ], [ false, %[[IF]] ] +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ false, %entry ], [ %new_done, %tail ] + %not_done = xor i1 %done, true + %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done) + %is_done = icmp eq i64 %ballot, 0 + br i1 %is_done, label %exit, label %if + +if: + %first_active_id = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 0) + %is_first_active_id = icmp eq i32 0, %first_active_id + br i1 %is_first_active_id, label %work, label %tail + +work: + store i32 5, ptr addrspace(1) %out + br label %tail + +tail: + %new_done = phi i1 [ true, %work ], [ false, %if ] + br label %while + +exit: + ret void +} + +define protected amdgpu_kernel void @uniform_waterfall(ptr addrspace(1) %out, i32 %mymask) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]], i32 [[MYMASK:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[WORK_PEEL:.*]] +; CURRENT-CHECK: [[WORK_PEEL]]: +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[EXIT]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[MYMASK:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ] +; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 [[MYMASK]], [[MYMASK]] +; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]] +; PASS-CHECK: [[WORK]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[TAIL]] +; PASS-CHECK: [[TAIL]]: +; PASS-CHECK-NEXT: [[NEW_DONE]] = phi i1 [ true, %[[WORK]] ], [ false, %[[IF]] ] +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]], i32 [[MYMASK:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ false, %entry ], [ %new_done, %tail ] + %not_done = xor i1 %done, true + %ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done) + %is_done = icmp eq i64 %ballot, 0 + br i1 %is_done, label %exit, label %if + +if: + %first_active_id = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 %mymask) + %is_first_active_id = icmp eq i32 %mymask, %first_active_id + br i1 %is_first_active_id, label %work, label %tail + +work: + store i32 5, ptr addrspace(1) %out + br label %tail + +tail: + %new_done = phi i1 [ true, %work ], [ false, %if ] + br label %while + +exit: + ret void +} + +define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: [[BALLOT_PEEL:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[BALLOT_PEEL]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]] +; CURRENT-CHECK: [[IF_PEEL]]: +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[EXIT]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ 0, %entry ], [ 1, %if ] + %not_done = xor i1 %done, true + %ballot = tail call i32 @llvm.amdgcn.ballot.i32(i1 %not_done) + %is_done = icmp eq i32 %ballot, 0 ; in this case is_done = !not_done + br i1 %is_done, label %exit, label %if + +if: + store i32 5, ptr addrspace(1) %out + br label %while + +exit: + ret void +} + +define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[ENTRY:.*:]] +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]] +; CURRENT-CHECK: [[WHILE]]: +; CURRENT-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true) +; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[BALLOT]], 0 +; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]] +; CURRENT-CHECK: [[EXIT]]: +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: br label %[[WHILE:.*]] +; PASS-CHECK: [[WHILE]]: +; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] +; PASS-CHECK: [[IF]]: +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: br label %[[WHILE]] +; PASS-CHECK: [[EXIT]]: +; PASS-CHECK-NEXT: ret void +; +; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32( +; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; O3-CHECK-NEXT: [[ENTRY:.*:]] +; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; O3-CHECK-NEXT: ret void +; +entry: + br label %while + +while: + %done = phi i1 [ 0, %entry ], [ 1, %if ] + %ballot = tail call i32 @llvm.amdgcn.ballot.i32(i1 %done) + %is_done = icmp ne i32 0, %ballot ; in this case is_done = done + br i1 %is_done, label %exit, label %if + +if: + store i32 5, ptr addrspace(1) %out + br label %while + +exit: + ret void +} + +declare i64 @llvm.amdgcn.ballot.i64(i1) #1 +!6 = !{i64 690} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"} +;. +; CURRENT-CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; CURRENT-CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} +; CURRENT-CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]} +; CURRENT-CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll new file mode 100644 index 0000000..aa11574 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-combine.ll @@ -0,0 +1,790 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,dce -S < %s | FileCheck %s -check-prefix=DCE-CHECK + +define amdgpu_kernel void @permlane64_constant(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @permlane64_constant( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CURRENT-CHECK-NEXT: store i32 77, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @permlane64_constant( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +; PASS-CHECK-NEXT: store i32 77, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @permlane64_constant( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { +; DCE-CHECK-NEXT: store i32 77, ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %v = call i32 @llvm.amdgcn.permlane64(i32 77) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @permlane64_uniform( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]], i32 [[SRC:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 [[SRC]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @permlane64_uniform( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: store i32 [[SRC]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @permlane64_uniform( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: store i32 [[SRC]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %v = call i32 @llvm.amdgcn.permlane64(i32 %src) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @permlane64_nonuniform(i32 addrspace(1)* %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @permlane64_nonuniform( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CURRENT-CHECK-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.permlane64.i32(i32 [[TID]]) +; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TID]] to i64 +; CURRENT-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i64 [[TMP1]] +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @permlane64_nonuniform( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[TID]]) +; PASS-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID]] +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @permlane64_nonuniform( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[TID]]) +; DCE-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID]] +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %v = call i32 @llvm.amdgcn.permlane64(i32 %tid) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define amdgpu_kernel void @permlane64_nonuniform_expression(i32 addrspace(1)* %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @permlane64_nonuniform_expression( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[TID2:%.*]] = add nuw nsw i32 [[TID]], 1 +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.permlane64.i32(i32 [[TID2]]) +; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TID]] to i64 +; CURRENT-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i64 [[TMP1]] +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @permlane64_nonuniform_expression( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[TID2:%.*]] = add i32 [[TID]], 1 +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[TID2]]) +; PASS-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID]] +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @permlane64_nonuniform_expression( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[TID2:%.*]] = add i32 [[TID]], 1 +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[TID2]]) +; DCE-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID]] +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid2 = add i32 %tid, 1 + %v = call i32 @llvm.amdgcn.permlane64(i32 %tid2) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define amdgpu_kernel void @readlane_constant(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_constant( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_constant( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_constant( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %v = call i32 @llvm.amdgcn.readlane(i32 7, i32 5) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readlane_nonuniform_indices(ptr addrspace(1) %out, i32 %src0, i32 %src1) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_indices( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]], i32 [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_indices( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_indices( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %v = call i32 @llvm.amdgcn.readlane(i32 %src0, i32 %src1) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readlane_nonuniform_workitem(i32 addrspace(1)* %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_workitem( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CURRENT-CHECK-NEXT: [[TIDX:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[TIDY:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TIDX]] to i64 +; CURRENT-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i64 [[TMP1]] +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_workitem( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; PASS-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TIDX]] +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_workitem( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; DCE-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TIDX]] +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tidx + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define amdgpu_kernel void @readlane_nonuniform_expression(i32 addrspace(1)* %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_expression( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CURRENT-CHECK-NEXT: [[TIDX:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[TIDY:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() +; CURRENT-CHECK-NEXT: [[TIDX2:%.*]] = add nuw nsw i32 [[TIDX]], 1 +; CURRENT-CHECK-NEXT: [[TIDY2:%.*]] = add nuw nsw i32 [[TIDY]], 2 +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX2]], i32 [[TIDY2]]) +; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TIDX]] to i64 +; CURRENT-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i64 [[TMP1]] +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_expression( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; PASS-CHECK-NEXT: [[TIDX2:%.*]] = add i32 [[TIDX]], 1 +; PASS-CHECK-NEXT: [[TIDY2:%.*]] = add i32 [[TIDY]], 2 +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX2]], i32 [[TIDY2]]) +; PASS-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TIDX]] +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_nonuniform_expression( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; DCE-CHECK-NEXT: [[TIDX2:%.*]] = add i32 [[TIDX]], 1 +; DCE-CHECK-NEXT: [[TIDY2:%.*]] = add i32 [[TIDY]], 2 +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX2]], i32 [[TIDY2]]) +; DCE-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TIDX]] +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %tidx2 = add i32 %tidx, 1 + %tidy2 = add i32 %tidy, 2 + %v = call i32 @llvm.amdgcn.readlane(i32 %tidx2, i32 %tidy2) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tidx + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define amdgpu_kernel void @readfirstlane_constant(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_constant( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_constant( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_constant( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: store i32 7, ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %v = call i32 @llvm.amdgcn.readfirstlane(i32 7) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readfirstlane_with_argument(ptr addrspace(1) %out, i32 %src0) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_argument( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]], i32 [[SRC0:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_argument( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC0:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_argument( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[SRC0:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: store i32 [[SRC0]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %v = call i32 @llvm.amdgcn.readfirstlane(i32 %src0) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readfirstlane_with_workitem_id(i32 addrspace(1)* %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_workitem_id( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID]]) +; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TID]] to i64 +; CURRENT-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i64 [[TMP1]] +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_workitem_id( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID]]) +; PASS-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID]] +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_workitem_id( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID]]) +; DCE-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID]] +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %v = call i32 @llvm.amdgcn.readfirstlane(i32 %tid) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define amdgpu_kernel void @readfirstlane_expression(i32 addrspace(1)* %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_expression( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[TID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[TID2:%.*]] = add nuw nsw i32 [[TID]], 1 +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID2]]) +; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[TID2]] to i64 +; CURRENT-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i64 [[TMP1]] +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_expression( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[TID2:%.*]] = add i32 [[TID]], 1 +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID2]]) +; PASS-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID2]] +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_expression( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[TID2:%.*]] = add i32 [[TID]], 1 +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TID2]]) +; DCE-CHECK-NEXT: [[OUT_PTR:%.*]] = getelementptr i32, ptr addrspace(1) [[OUT]], i32 [[TID2]] +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT_PTR]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %tid2 = add i32 %tid, 1 + %v = call i32 @llvm.amdgcn.readfirstlane(i32 %tid2) + %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2 + store i32 %v, i32 addrspace(1)* %out_ptr + ret void +} + +define amdgpu_kernel void @readfirstlane_with_readfirstlane(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readfirstlane( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readfirstlane( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readfirstlane( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %v1 = call i32 @llvm.amdgcn.readfirstlane(i32 5) + %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1) + store i32 %v2, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readfirstlane_with_readlane(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readlane( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CURRENT-CHECK-NEXT: [[TIDX:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[TIDY:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() +; CURRENT-CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; CURRENT-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readlane( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; PASS-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; PASS-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_with_readlane( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; DCE-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; DCE-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v1 = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy) + %v2 = call i32 @llvm.amdgcn.readfirstlane(i32 %v1) + store i32 %v2, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readlane_with_firstlane(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_with_firstlane( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[TIDX:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TIDX]]) +; CURRENT-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_with_firstlane( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TIDX]]) +; PASS-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_with_firstlane( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[TIDX]]) +; DCE-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %v1 = call i32 @llvm.amdgcn.readfirstlane(i32 %tidx) + %v2 = call i32 @llvm.amdgcn.readlane(i32 %v1, i32 3) + store i32 %v2, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readlane_readlane(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_readlane( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CURRENT-CHECK-NEXT: [[TIDX:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[TIDY:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y() +; CURRENT-CHECK-NEXT: [[V1:%.*]] = tail call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; CURRENT-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_readlane( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; PASS-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; PASS-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_readlane( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[TIDY:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; DCE-CHECK-NEXT: [[V1:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; DCE-CHECK-NEXT: store i32 [[V1]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v1 = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy) + %v2 = call i32 @llvm.amdgcn.readlane(i32 %v1, i32 2) + store i32 %v2, ptr addrspace(1) %out + ret void +} + + +define amdgpu_kernel void @permlane64_boundary(ptr addrspace(1) %out_min, ptr addrspace(1) %out_max) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @permlane64_boundary( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT_MIN:%.*]], ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT_MAX:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 -2147483648, ptr addrspace(1) [[OUT_MIN]], align 4 +; CURRENT-CHECK-NEXT: store i32 2147483647, ptr addrspace(1) [[OUT_MAX]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @permlane64_boundary( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT_MIN:%.*]], ptr addrspace(1) [[OUT_MAX:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: store i32 -2147483648, ptr addrspace(1) [[OUT_MIN]], align 4 +; PASS-CHECK-NEXT: store i32 2147483647, ptr addrspace(1) [[OUT_MAX]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @permlane64_boundary( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT_MIN:%.*]], ptr addrspace(1) [[OUT_MAX:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: store i32 -2147483648, ptr addrspace(1) [[OUT_MIN]], align 4 +; DCE-CHECK-NEXT: store i32 2147483647, ptr addrspace(1) [[OUT_MAX]], align 4 +; DCE-CHECK-NEXT: ret void +; + %min_v = call i32 @llvm.amdgcn.permlane64(i32 -2147483648) + store i32 %min_v, ptr addrspace(1) %out_min + %max_v = call i32 @llvm.amdgcn.permlane64(i32 2147483647) + store i32 %max_v, ptr addrspace(1) %out_max + ret void +} + +define amdgpu_kernel void @readlane_cross_lane(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_cross_lane( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[TIDX:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[TIDY:%.*]] = add nuw nsw i32 [[TIDX]], 5 +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_cross_lane( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[TIDY:%.*]] = add i32 [[TIDX]], 5 +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_cross_lane( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[TIDX:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[TIDY:%.*]] = add i32 [[TIDX]], 5 +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[TIDX]], i32 [[TIDY]]) +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = add i32 %tidx, 5 + %v = call i32 @llvm.amdgcn.readlane(i32 %tidx, i32 %tidy) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readfirstlane_random(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_random( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CURRENT-CHECK-NEXT: store i32 435, ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_random( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[RANDOM:%.*]] = xor i32 123, 456 +; PASS-CHECK-NEXT: store i32 [[RANDOM]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readfirstlane_random( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[RANDOM:%.*]] = xor i32 123, 456 +; DCE-CHECK-NEXT: store i32 [[RANDOM]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %random = xor i32 123, 456 + %v = call i32 @llvm.amdgcn.readfirstlane(i32 %random) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @readlane_expression(ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @readlane_expression( +; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[IDX1:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() +; CURRENT-CHECK-NEXT: [[IDX2:%.*]] = shl nuw nsw i32 [[IDX1]], 1 +; CURRENT-CHECK-NEXT: [[V:%.*]] = tail call i32 @llvm.amdgcn.readlane.i32(i32 [[IDX1]], i32 [[IDX2]]) +; CURRENT-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @readlane_expression( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[IDX1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: [[IDX2:%.*]] = mul i32 [[IDX1]], 2 +; PASS-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[IDX1]], i32 [[IDX2]]) +; PASS-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @readlane_expression( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[IDX1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; DCE-CHECK-NEXT: [[IDX2:%.*]] = mul i32 [[IDX1]], 2 +; DCE-CHECK-NEXT: [[V:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[IDX1]], i32 [[IDX2]]) +; DCE-CHECK-NEXT: store i32 [[V]], ptr addrspace(1) [[OUT]], align 4 +; DCE-CHECK-NEXT: ret void +; + %idx1 = call i32 @llvm.amdgcn.workitem.id.x() + %idx2 = mul i32 %idx1, 2 + %v = call i32 @llvm.amdgcn.readlane(i32 %idx1, i32 %idx2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @ballot_i32(i32 %v, ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @ballot_i32( +; CURRENT-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) writeonly captures(none) initializes((0, 1)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; CURRENT-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[C]]) +; CURRENT-CHECK-NEXT: [[BALLOT_NE_ZERO:%.*]] = icmp ne i32 [[BALLOT]], 0 +; CURRENT-CHECK-NEXT: store i1 [[BALLOT_NE_ZERO]], ptr addrspace(1) [[OUT]], align 1 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @ballot_i32( +; PASS-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; PASS-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @ballot_i32( +; DCE-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; DCE-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1 +; DCE-CHECK-NEXT: ret void +; + %c = trunc i32 %v to i1 + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) + %ballot_ne_zero = icmp ne i32 %ballot, 0 + store i1 %ballot_ne_zero, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @ballot_i64(i32 %v, ptr addrspace(1) %out) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @ballot_i64( +; CURRENT-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) writeonly captures(none) initializes((0, 1)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] { +; CURRENT-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; CURRENT-CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[C]]) +; CURRENT-CHECK-NEXT: [[BALLOT_NE_ZERO:%.*]] = icmp ne i32 [[TMP1]], 0 +; CURRENT-CHECK-NEXT: store i1 [[BALLOT_NE_ZERO]], ptr addrspace(1) [[OUT]], align 1 +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @ballot_i64( +; PASS-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; PASS-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1 +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @ballot_i64( +; DCE-CHECK-SAME: i32 [[V:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[C:%.*]] = trunc i32 [[V]] to i1 +; DCE-CHECK-NEXT: store i1 [[C]], ptr addrspace(1) [[OUT]], align 1 +; DCE-CHECK-NEXT: ret void +; + %c = trunc i32 %v to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) + %ballot_ne_zero = icmp ne i64 %ballot, 0 + store i1 %ballot_ne_zero, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_readlane_i16(i16 %src0, i32 %src1) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i16( +; CURRENT-CHECK-SAME: i16 [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +; CURRENT-CHECK-NEXT: tail call void asm sideeffect " +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i16( +; PASS-CHECK-SAME: i16 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: call void asm sideeffect " +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i16( +; DCE-CHECK-SAME: i16 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: call void asm sideeffect " +; DCE-CHECK-NEXT: ret void +; + %readlane = call i16 @llvm.amdgcn.readlane.i16(i16 %src0, i32 %src1) + call void asm sideeffect "; use $0", "s"(i16 %readlane) + ret void +} + +define amdgpu_kernel void @test_readlane_i64(i64 %src0, i32 %src1) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i64( +; CURRENT-CHECK-SAME: i64 [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CURRENT-CHECK-NEXT: tail call void asm sideeffect " +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i64( +; PASS-CHECK-SAME: i64 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: call void asm sideeffect " +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_i64( +; DCE-CHECK-SAME: i64 [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: call void asm sideeffect " +; DCE-CHECK-NEXT: ret void +; + %readlane = call i64 @llvm.amdgcn.readlane.i64(i64 %src0, i32 %src1) + call void asm sideeffect "; use $0", "s"(i64 %readlane) + ret void +} + +define amdgpu_kernel void @test_readlane_bf16(bfloat %src0, i32 %src1) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_bf16( +; CURRENT-CHECK-SAME: bfloat [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CURRENT-CHECK-NEXT: tail call void asm sideeffect " +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_bf16( +; PASS-CHECK-SAME: bfloat [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: call void asm sideeffect " +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_bf16( +; DCE-CHECK-SAME: bfloat [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: call void asm sideeffect " +; DCE-CHECK-NEXT: ret void +; + %readlane = call bfloat @llvm.amdgcn.readlane.bf16(bfloat %src0, i32 %src1) + call void asm sideeffect "; use $0", "s"(bfloat %readlane) + ret void +} + +define amdgpu_kernel void @test_readlane_f16(half %src0, i32 %src1) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f16( +; CURRENT-CHECK-SAME: half [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CURRENT-CHECK-NEXT: tail call void asm sideeffect " +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f16( +; PASS-CHECK-SAME: half [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: call void asm sideeffect " +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f16( +; DCE-CHECK-SAME: half [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: call void asm sideeffect " +; DCE-CHECK-NEXT: ret void +; + %readlane = call half @llvm.amdgcn.readlane.f16(half %src0, i32 %src1) + call void asm sideeffect "; use $0", "s"(half %readlane) + ret void +} + +define amdgpu_kernel void @test_readlane_f32(float %src0, i32 %src1) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f32( +; CURRENT-CHECK-SAME: float [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CURRENT-CHECK-NEXT: tail call void asm sideeffect " +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f32( +; PASS-CHECK-SAME: float [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: call void asm sideeffect " +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f32( +; DCE-CHECK-SAME: float [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: call void asm sideeffect " +; DCE-CHECK-NEXT: ret void +; + %readlane = call float @llvm.amdgcn.readlane.f32(float %src0, i32 %src1) + call void asm sideeffect "; use $0", "s"(float %readlane) + ret void +} + +define amdgpu_kernel void @test_readlane_f64(double %src0, i32 %src1) { +; CURRENT-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f64( +; CURRENT-CHECK-SAME: double [[SRC0:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CURRENT-CHECK-NEXT: tail call void asm sideeffect " +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f64( +; PASS-CHECK-SAME: double [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: call void asm sideeffect " +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define amdgpu_kernel void @test_readlane_f64( +; DCE-CHECK-SAME: double [[SRC0:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: call void asm sideeffect " +; DCE-CHECK-NEXT: ret void +; + %readlane = call double @llvm.amdgcn.readlane.f64(double %src0, i32 %src1) + call void asm sideeffect "; use $0", "s"(double %readlane) + ret void +} +; All such cases can be optimised, given generic way to query getDeclarationIfExists() +define void @test_readlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src, i32 %src1) { +; CURRENT-CHECK-LABEL: define void @test_readlane_v8i16( +; CURRENT-CHECK-SAME: ptr addrspace(1) readnone captures(none) [[OUT:%.*]], <8 x i16> [[SRC:%.*]], i32 [[SRC1:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CURRENT-CHECK-NEXT: [[X:%.*]] = tail call <8 x i16> @llvm.amdgcn.readlane.v8i16(<8 x i16> [[SRC]], i32 [[SRC1]]) +; CURRENT-CHECK-NEXT: tail call void asm sideeffect " +; CURRENT-CHECK-NEXT: ret void +; +; PASS-CHECK-LABEL: define void @test_readlane_v8i16( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <8 x i16> [[SRC:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; PASS-CHECK-NEXT: [[X:%.*]] = call <8 x i16> @llvm.amdgcn.readlane.v8i16(<8 x i16> [[SRC]], i32 [[SRC1]]) +; PASS-CHECK-NEXT: call void asm sideeffect " +; PASS-CHECK-NEXT: ret void +; +; DCE-CHECK-LABEL: define void @test_readlane_v8i16( +; DCE-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <8 x i16> [[SRC:%.*]], i32 [[SRC1:%.*]]) #[[ATTR0]] { +; DCE-CHECK-NEXT: [[X:%.*]] = call <8 x i16> @llvm.amdgcn.readlane.v8i16(<8 x i16> [[SRC]], i32 [[SRC1]]) +; DCE-CHECK-NEXT: call void asm sideeffect " +; DCE-CHECK-NEXT: ret void +; + %x = call <8 x i16> @llvm.amdgcn.readlane.v8i16(<8 x i16> %src, i32 %src1) + call void asm sideeffect "; use $0", "s"(<8 x i16> %x) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll new file mode 100644 index 0000000..2fde3e3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-temporal-divergence.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine,instcombine,early-cse,simplifycfg -S < %s | FileCheck %s -check-prefix=COMB-CHECK + +; This should not be optimized +define amdgpu_cs void @temporal_divergence(ptr addrspace(1) %out, i32 %n) { +; PASS-CHECK-LABEL: define amdgpu_cs void @temporal_divergence( +; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; PASS-CHECK-NEXT: [[ENTRY:.*]]: +; PASS-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; PASS-CHECK-NEXT: br label %[[H:.*]] +; PASS-CHECK: [[H]]: +; PASS-CHECK-NEXT: [[UNI_MERGE_H:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[UNI_INC:%.*]], %[[H]] ] +; PASS-CHECK-NEXT: [[UNI_INC]] = add i32 [[UNI_MERGE_H]], 1 +; PASS-CHECK-NEXT: [[DIV_EXITX:%.*]] = icmp eq i32 [[TID]], 0 +; PASS-CHECK-NEXT: br i1 [[DIV_EXITX]], label %[[X:.*]], label %[[H]] +; PASS-CHECK: [[X]]: +; PASS-CHECK-NEXT: [[UNI_JOIN:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[UNI_INC]]) +; PASS-CHECK-NEXT: [[JOIN_USER:%.*]] = add i32 [[UNI_JOIN]], 5 +; PASS-CHECK-NEXT: store i32 [[JOIN_USER]], ptr addrspace(1) [[OUT]], align 4 +; PASS-CHECK-NEXT: ret void +; +; COMB-CHECK-LABEL: define amdgpu_cs void @temporal_divergence( +; COMB-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; COMB-CHECK-NEXT: [[ENTRY:.*]]: +; COMB-CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; COMB-CHECK-NEXT: br label %[[H:.*]] +; COMB-CHECK: [[H]]: +; COMB-CHECK-NEXT: [[UNI_MERGE_H:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[UNI_INC:%.*]], %[[H]] ] +; COMB-CHECK-NEXT: [[UNI_INC]] = add i32 [[UNI_MERGE_H]], 1 +; COMB-CHECK-NEXT: [[DIV_EXITX:%.*]] = icmp eq i32 [[TID]], 0 +; COMB-CHECK-NEXT: br i1 [[DIV_EXITX]], label %[[X:.*]], label %[[H]] +; COMB-CHECK: [[X]]: +; COMB-CHECK-NEXT: [[UNI_JOIN:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[UNI_INC]]) +; COMB-CHECK-NEXT: [[JOIN_USER:%.*]] = add i32 [[UNI_JOIN]], 5 +; COMB-CHECK-NEXT: store i32 [[JOIN_USER]], ptr addrspace(1) [[OUT]], align 4 +; COMB-CHECK-NEXT: ret void +; +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + br label %H + +H: + %uni.merge.h = phi i32 [ 0, %entry ], [ %uni.inc, %H ] + %uni.inc = add i32 %uni.merge.h, 1 + %div.exitx = icmp eq i32 %tid, 0 + br i1 %div.exitx, label %X, label %H ; divergent branch + +X: + %uni.join = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %uni.inc) + %join.user = add i32 %uni.join, 5 + store i32 %join.user, ptr addrspace(1) %out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() +declare i32 @llvm.amdgcn.readfirstlane.i32(i32) diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll index 2ae6fc2..4a6fa4f 100644 --- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -691,7 +691,8 @@ define amdgpu_kernel void @uaddo32_vcc_user(ptr addrspace(1) %out, ptr addrspace ; GCN-ISEL-LABEL: name: suaddo64 ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0 -; GCN-ISEL: S_ADD_U64_PSEUDO +; GCN-ISEL: S_UADDO_PSEUDO +; GCN-ISEL: S_ADD_CO_PSEUDO define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 { ; CISI-LABEL: suaddo64: @@ -700,21 +701,23 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; CISI-NEXT: s_mov_b32 s11, 0xf000 ; CISI-NEXT: s_mov_b32 s10, -1 ; CISI-NEXT: s_waitcnt lgkmcnt(0) -; CISI-NEXT: s_add_u32 s6, s4, s6 -; CISI-NEXT: v_mov_b32_e32 v0, s4 -; CISI-NEXT: s_addc_u32 s7, s5, s7 -; CISI-NEXT: v_mov_b32_e32 v1, s5 -; CISI-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] -; CISI-NEXT: v_mov_b32_e32 v2, s6 +; CISI-NEXT: s_add_u32 s4, s4, s6 +; CISI-NEXT: s_cselect_b64 s[12:13], -1, 0 +; CISI-NEXT: s_or_b32 s6, s12, s13 +; CISI-NEXT: s_cmp_lg_u32 s6, 0 +; CISI-NEXT: s_addc_u32 s5, s5, s7 ; CISI-NEXT: s_mov_b32 s8, s0 ; CISI-NEXT: s_mov_b32 s9, s1 +; CISI-NEXT: v_mov_b32_e32 v0, s4 +; CISI-NEXT: v_mov_b32_e32 v1, s5 +; CISI-NEXT: s_cselect_b64 s[4:5], -1, 0 ; CISI-NEXT: s_mov_b32 s0, s2 ; CISI-NEXT: s_mov_b32 s1, s3 ; CISI-NEXT: s_mov_b32 s2, s10 ; CISI-NEXT: s_mov_b32 s3, s11 -; CISI-NEXT: v_mov_b32_e32 v3, s7 -; CISI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CISI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0 +; CISI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; CISI-NEXT: s_waitcnt expcnt(0) +; CISI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; CISI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; CISI-NEXT: s_endpgm ; @@ -722,37 +725,37 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_add_u32 s2, s4, s6 ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_add_u32 s0, s4, s6 -; VI-NEXT: v_mov_b32_e32 v4, s4 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_addc_u32 s1, s5, s7 -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_mov_b32_e32 v7, s1 -; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[4:5] -; VI-NEXT: v_mov_b32_e32 v6, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: s_cmp_lg_u64 s[0:1], 0 +; VI-NEXT: s_addc_u32 s0, s5, s7 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_mov_b32_e32 v5, s0 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[6:7] -; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; VI-NEXT: flat_store_byte v[2:3], v0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: suaddo64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s0, s12, s14 -; GFX9-NEXT: v_mov_b32_e32 v0, s12 -; GFX9-NEXT: v_mov_b32_e32 v1, s13 -; GFX9-NEXT: s_addc_u32 s1, s13, s15 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] -; GFX9-NEXT: global_store_byte v4, v0, s[10:11] +; GFX9-NEXT: s_add_u32 s2, s12, s14 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX9-NEXT: s_addc_u32 s0, s13, s15 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX9-NEXT: global_store_byte v2, v3, s[10:11] ; GFX9-NEXT: s_endpgm ; ; GFX1010-LABEL: suaddo64: @@ -761,10 +764,12 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1010-NEXT: v_mov_b32_e32 v2, 0 ; GFX1010-NEXT: s_waitcnt lgkmcnt(0) ; GFX1010-NEXT: s_add_u32 s0, s12, s14 -; GFX1010-NEXT: s_addc_u32 s1, s13, s15 +; GFX1010-NEXT: s_cselect_b32 s1, -1, 0 ; GFX1010-NEXT: v_mov_b32_e32 v0, s0 +; GFX1010-NEXT: s_cmp_lg_u32 s1, 0 +; GFX1010-NEXT: s_addc_u32 s1, s13, s15 +; GFX1010-NEXT: s_cselect_b32 s0, -1, 0 ; GFX1010-NEXT: v_mov_b32_e32 v1, s1 -; GFX1010-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], s[12:13] ; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 ; GFX1010-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX1010-NEXT: global_store_byte v2, v3, s[10:11] @@ -775,11 +780,13 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W32-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; GFX1030W32-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX1030W32-NEXT: s_add_u32 s6, s4, s6 -; GFX1030W32-NEXT: s_addc_u32 s7, s5, s7 -; GFX1030W32-NEXT: v_mov_b32_e32 v0, s6 -; GFX1030W32-NEXT: v_cmp_lt_u64_e64 s4, s[6:7], s[4:5] -; GFX1030W32-NEXT: v_mov_b32_e32 v1, s7 +; GFX1030W32-NEXT: s_add_u32 s4, s4, s6 +; GFX1030W32-NEXT: s_cselect_b32 s6, -1, 0 +; GFX1030W32-NEXT: v_mov_b32_e32 v0, s4 +; GFX1030W32-NEXT: s_cmp_lg_u32 s6, 0 +; GFX1030W32-NEXT: s_addc_u32 s5, s5, s7 +; GFX1030W32-NEXT: s_cselect_b32 s4, -1, 0 +; GFX1030W32-NEXT: v_mov_b32_e32 v1, s5 ; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1030W32-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W32-NEXT: global_store_byte v2, v3, s[2:3] @@ -790,11 +797,13 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W64-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; GFX1030W64-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX1030W64-NEXT: s_add_u32 s6, s4, s6 -; GFX1030W64-NEXT: s_addc_u32 s7, s5, s7 -; GFX1030W64-NEXT: v_mov_b32_e32 v0, s6 -; GFX1030W64-NEXT: v_cmp_lt_u64_e64 s[4:5], s[6:7], s[4:5] -; GFX1030W64-NEXT: v_mov_b32_e32 v1, s7 +; GFX1030W64-NEXT: s_add_u32 s4, s4, s6 +; GFX1030W64-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GFX1030W64-NEXT: v_mov_b32_e32 v0, s4 +; GFX1030W64-NEXT: s_cmp_lg_u64 s[8:9], 0 +; GFX1030W64-NEXT: s_addc_u32 s5, s5, s7 +; GFX1030W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX1030W64-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] ; GFX1030W64-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W64-NEXT: global_store_byte v2, v3, s[2:3] @@ -804,12 +813,13 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_u32 s6, s4, s6 -; GFX11-NEXT: s_addc_u32 s7, s5, s7 -; GFX11-NEXT: v_mov_b32_e32 v0, s6 -; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[6:7], s[4:5] -; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: s_add_u32 s4, s4, s6 +; GFX11-NEXT: s_cselect_b32 s6, -1, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s4 +; GFX11-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-NEXT: s_addc_u32 s5, s5, s7 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s5 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] @@ -819,12 +829,14 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1250-LABEL: suaddo64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v2, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_add_nc_u64 s[0:1], s[12:13], s[14:15] -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX1250-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], s[12:13] +; GFX1250-NEXT: s_add_co_u32 s0, s12, s14 +; GFX1250-NEXT: s_cselect_b32 s1, -1, 0 +; GFX1250-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v0, s0 +; GFX1250-NEXT: s_cmp_lg_u32 s1, 0 +; GFX1250-NEXT: s_add_co_ci_u32 s1, s13, s15 +; GFX1250-NEXT: s_cselect_b32 s0, -1, 0 +; GFX1250-NEXT: v_mov_b32_e32 v1, s1 ; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[8:9] @@ -841,7 +853,8 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GCN-ISEL-LABEL: name: vuaddo64 ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0 -; GCN-ISEL: V_ADD_U64_PSEUDO +; GCN-ISEL: V_ADD_CO_U32_e64 +; GCN-ISEL: V_ADDC_U32_e64 define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a) #0 { ; CISI-LABEL: vuaddo64: @@ -854,9 +867,8 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; CISI-NEXT: s_mov_b32 s4, s0 ; CISI-NEXT: v_mov_b32_e32 v1, s9 ; CISI-NEXT: v_add_i32_e32 v0, vcc, s8, v0 -; CISI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; CISI-NEXT: v_cmp_gt_u64_e32 vcc, s[8:9], v[0:1] ; CISI-NEXT: s_mov_b32 s5, s1 +; CISI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CISI-NEXT: s_mov_b32 s0, s2 ; CISI-NEXT: s_mov_b32 s1, s3 ; CISI-NEXT: s_mov_b32 s2, s6 @@ -876,7 +888,6 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; VI-NEXT: v_mov_b32_e32 v6, s5 ; VI-NEXT: v_add_u32_e32 v5, vcc, s4, v0 ; VI-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[5:6] ; VI-NEXT: v_mov_b32_e32 v2, s1 ; VI-NEXT: v_mov_b32_e32 v3, s2 ; VI-NEXT: v_mov_b32_e32 v4, s3 @@ -894,7 +905,6 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1] ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-NEXT: global_store_byte v2, v0, s[2:3] @@ -909,8 +919,7 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1010-NEXT: s_waitcnt lgkmcnt(0) ; GFX1010-NEXT: v_add_co_u32 v0, s4, s6, v0 ; GFX1010-NEXT: v_add_co_ci_u32_e64 v1, s4, s7, 0, s4 -; GFX1010-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1010-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1010-NEXT: global_store_byte v2, v3, s[2:3] ; GFX1010-NEXT: s_endpgm @@ -923,9 +932,8 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W32-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030W32-NEXT: v_add_co_u32 v0, s4, s6, v0 -; GFX1030W32-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s4 -; GFX1030W32-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1030W32-NEXT: v_add_co_ci_u32_e64 v1, s4, s7, 0, s4 +; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1030W32-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W32-NEXT: global_store_byte v2, v3, s[2:3] ; GFX1030W32-NEXT: s_endpgm @@ -938,9 +946,8 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W64-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030W64-NEXT: v_add_co_u32 v0, s[4:5], s6, v0 -; GFX1030W64-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s[4:5] -; GFX1030W64-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1] -; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX1030W64-NEXT: v_add_co_ci_u32_e64 v1, s[4:5], s7, 0, s[4:5] +; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] ; GFX1030W64-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W64-NEXT: global_store_byte v2, v3, s[2:3] ; GFX1030W64-NEXT: s_endpgm @@ -955,10 +962,9 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_co_u32 v0, s4, s6, v0 -; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s4 +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, s4, s7, 0, s4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: global_store_b8 v2, v3, s[2:3] @@ -969,16 +975,17 @@ define amdgpu_kernel void @vuaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v1, 0 ; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-NEXT: v_add_nc_u64_e32 v[2:3], s[6:7], v[0:1] -; GFX1250-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[6:7], v[2:3] -; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_add_co_u32 v0, s4, s6, v0 +; GFX1250-NEXT: v_add_co_ci_u32_e64 v1, s4, s7, 0, s4 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1250-NEXT: s_clause 0x1 -; GFX1250-NEXT: global_store_b64 v1, v[2:3], s[0:1] -; GFX1250-NEXT: global_store_b8 v1, v0, s[2:3] +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: global_store_b8 v2, v3, s[2:3] ; GFX1250-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -1671,7 +1678,8 @@ define amdgpu_kernel void @usubo32_vcc_user(ptr addrspace(1) %out, ptr addrspace ; GCN-ISEL-LABEL: name: susubo64 ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0 -; GCN-ISEL: S_SUB_U64_PSEUDO +; GCN-ISEL: S_USUBO_PSEUDO +; GCN-ISEL: S_SUB_CO_PSEUDO define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) #0 { ; CISI-LABEL: susubo64: @@ -1680,21 +1688,23 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; CISI-NEXT: s_mov_b32 s11, 0xf000 ; CISI-NEXT: s_mov_b32 s10, -1 ; CISI-NEXT: s_waitcnt lgkmcnt(0) -; CISI-NEXT: s_sub_u32 s6, s4, s6 -; CISI-NEXT: v_mov_b32_e32 v0, s4 -; CISI-NEXT: s_subb_u32 s7, s5, s7 -; CISI-NEXT: v_mov_b32_e32 v1, s5 -; CISI-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1] -; CISI-NEXT: v_mov_b32_e32 v2, s6 +; CISI-NEXT: s_sub_u32 s4, s4, s6 +; CISI-NEXT: s_cselect_b64 s[12:13], -1, 0 +; CISI-NEXT: s_or_b32 s6, s12, s13 +; CISI-NEXT: s_cmp_lg_u32 s6, 0 +; CISI-NEXT: s_subb_u32 s5, s5, s7 ; CISI-NEXT: s_mov_b32 s8, s0 ; CISI-NEXT: s_mov_b32 s9, s1 +; CISI-NEXT: v_mov_b32_e32 v0, s4 +; CISI-NEXT: v_mov_b32_e32 v1, s5 +; CISI-NEXT: s_cselect_b64 s[4:5], -1, 0 ; CISI-NEXT: s_mov_b32 s0, s2 ; CISI-NEXT: s_mov_b32 s1, s3 ; CISI-NEXT: s_mov_b32 s2, s10 ; CISI-NEXT: s_mov_b32 s3, s11 -; CISI-NEXT: v_mov_b32_e32 v3, s7 -; CISI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CISI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0 +; CISI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; CISI-NEXT: s_waitcnt expcnt(0) +; CISI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; CISI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; CISI-NEXT: s_endpgm ; @@ -1702,37 +1712,37 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_sub_u32 s2, s4, s6 ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_sub_u32 s0, s4, s6 -; VI-NEXT: v_mov_b32_e32 v4, s4 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_subb_u32 s1, s5, s7 -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_mov_b32_e32 v7, s1 -; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5] -; VI-NEXT: v_mov_b32_e32 v6, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: s_cmp_lg_u64 s[0:1], 0 +; VI-NEXT: s_subb_u32 s0, s5, s7 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_mov_b32_e32 v5, s0 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[6:7] -; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; VI-NEXT: flat_store_byte v[2:3], v0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: susubo64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_sub_u32 s0, s12, s14 -; GFX9-NEXT: v_mov_b32_e32 v0, s12 -; GFX9-NEXT: v_mov_b32_e32 v1, s13 -; GFX9-NEXT: s_subb_u32 s1, s13, s15 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] -; GFX9-NEXT: global_store_byte v4, v0, s[10:11] +; GFX9-NEXT: s_sub_u32 s2, s12, s14 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX9-NEXT: s_subb_u32 s0, s13, s15 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX9-NEXT: global_store_byte v2, v3, s[10:11] ; GFX9-NEXT: s_endpgm ; ; GFX1010-LABEL: susubo64: @@ -1741,10 +1751,12 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1010-NEXT: v_mov_b32_e32 v2, 0 ; GFX1010-NEXT: s_waitcnt lgkmcnt(0) ; GFX1010-NEXT: s_sub_u32 s0, s12, s14 -; GFX1010-NEXT: s_subb_u32 s1, s13, s15 +; GFX1010-NEXT: s_cselect_b32 s1, -1, 0 ; GFX1010-NEXT: v_mov_b32_e32 v0, s0 +; GFX1010-NEXT: s_cmp_lg_u32 s1, 0 +; GFX1010-NEXT: s_subb_u32 s1, s13, s15 +; GFX1010-NEXT: s_cselect_b32 s0, -1, 0 ; GFX1010-NEXT: v_mov_b32_e32 v1, s1 -; GFX1010-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], s[12:13] ; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 ; GFX1010-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX1010-NEXT: global_store_byte v2, v3, s[10:11] @@ -1755,11 +1767,13 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W32-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; GFX1030W32-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W32-NEXT: s_waitcnt lgkmcnt(0) -; GFX1030W32-NEXT: s_sub_u32 s6, s4, s6 -; GFX1030W32-NEXT: s_subb_u32 s7, s5, s7 -; GFX1030W32-NEXT: v_mov_b32_e32 v0, s6 -; GFX1030W32-NEXT: v_cmp_gt_u64_e64 s4, s[6:7], s[4:5] -; GFX1030W32-NEXT: v_mov_b32_e32 v1, s7 +; GFX1030W32-NEXT: s_sub_u32 s4, s4, s6 +; GFX1030W32-NEXT: s_cselect_b32 s6, -1, 0 +; GFX1030W32-NEXT: v_mov_b32_e32 v0, s4 +; GFX1030W32-NEXT: s_cmp_lg_u32 s6, 0 +; GFX1030W32-NEXT: s_subb_u32 s5, s5, s7 +; GFX1030W32-NEXT: s_cselect_b32 s4, -1, 0 +; GFX1030W32-NEXT: v_mov_b32_e32 v1, s5 ; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1030W32-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W32-NEXT: global_store_byte v2, v3, s[2:3] @@ -1770,11 +1784,13 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W64-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; GFX1030W64-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W64-NEXT: s_waitcnt lgkmcnt(0) -; GFX1030W64-NEXT: s_sub_u32 s6, s4, s6 -; GFX1030W64-NEXT: s_subb_u32 s7, s5, s7 -; GFX1030W64-NEXT: v_mov_b32_e32 v0, s6 -; GFX1030W64-NEXT: v_cmp_gt_u64_e64 s[4:5], s[6:7], s[4:5] -; GFX1030W64-NEXT: v_mov_b32_e32 v1, s7 +; GFX1030W64-NEXT: s_sub_u32 s4, s4, s6 +; GFX1030W64-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GFX1030W64-NEXT: v_mov_b32_e32 v0, s4 +; GFX1030W64-NEXT: s_cmp_lg_u64 s[8:9], 0 +; GFX1030W64-NEXT: s_subb_u32 s5, s5, s7 +; GFX1030W64-NEXT: v_mov_b32_e32 v1, s5 +; GFX1030W64-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] ; GFX1030W64-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W64-NEXT: global_store_byte v2, v3, s[2:3] @@ -1784,12 +1800,13 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_sub_u32 s6, s4, s6 -; GFX11-NEXT: s_subb_u32 s7, s5, s7 -; GFX11-NEXT: v_mov_b32_e32 v0, s6 -; GFX11-NEXT: v_cmp_gt_u64_e64 s4, s[6:7], s[4:5] -; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: s_sub_u32 s4, s4, s6 +; GFX11-NEXT: s_cselect_b32 s6, -1, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s4 +; GFX11-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-NEXT: s_subb_u32 s5, s5, s7 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s5 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] @@ -1799,12 +1816,14 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1250-LABEL: susubo64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v2, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_sub_nc_u64 s[0:1], s[12:13], s[14:15] -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX1250-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], s[12:13] +; GFX1250-NEXT: s_sub_co_u32 s0, s12, s14 +; GFX1250-NEXT: s_cselect_b32 s1, -1, 0 +; GFX1250-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v0, s0 +; GFX1250-NEXT: s_cmp_lg_u32 s1, 0 +; GFX1250-NEXT: s_sub_co_ci_u32 s1, s13, s15 +; GFX1250-NEXT: s_cselect_b32 s0, -1, 0 +; GFX1250-NEXT: v_mov_b32_e32 v1, s1 ; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[8:9] @@ -1821,7 +1840,8 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GCN-ISEL-LABEL: name: vusubo64 ; GCN-ISEL-LABEL: body: ; GCN-ISEL-LABEL: bb.0 -; GCN-ISEL: V_SUB_U64_PSEUDO +; GCN-ISEL: V_SUB_CO_U32_e64 +; GCN-ISEL: V_SUBB_U32_e64 define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a) #0 { ; CISI-LABEL: vusubo64: @@ -1834,9 +1854,8 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; CISI-NEXT: s_mov_b32 s4, s0 ; CISI-NEXT: v_mov_b32_e32 v1, s9 ; CISI-NEXT: v_sub_i32_e32 v0, vcc, s8, v0 -; CISI-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CISI-NEXT: v_cmp_lt_u64_e32 vcc, s[8:9], v[0:1] ; CISI-NEXT: s_mov_b32 s5, s1 +; CISI-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CISI-NEXT: s_mov_b32 s0, s2 ; CISI-NEXT: s_mov_b32 s1, s3 ; CISI-NEXT: s_mov_b32 s2, s6 @@ -1856,7 +1875,6 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; VI-NEXT: v_mov_b32_e32 v6, s5 ; VI-NEXT: v_sub_u32_e32 v5, vcc, s4, v0 ; VI-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc -; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[5:6] ; VI-NEXT: v_mov_b32_e32 v2, s1 ; VI-NEXT: v_mov_b32_e32 v3, s2 ; VI-NEXT: v_mov_b32_e32 v4, s3 @@ -1874,7 +1892,6 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX9-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s6, v0 ; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-NEXT: global_store_byte v2, v0, s[2:3] @@ -1889,8 +1906,7 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1010-NEXT: s_waitcnt lgkmcnt(0) ; GFX1010-NEXT: v_sub_co_u32 v0, s4, s6, v0 ; GFX1010-NEXT: v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4 -; GFX1010-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1010-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1010-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1010-NEXT: global_store_byte v2, v3, s[2:3] ; GFX1010-NEXT: s_endpgm @@ -1903,9 +1919,8 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W32-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W32-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030W32-NEXT: v_sub_co_u32 v0, s4, s6, v0 -; GFX1030W32-NEXT: v_sub_co_ci_u32_e64 v1, null, s7, 0, s4 -; GFX1030W32-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX1030W32-NEXT: v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4 +; GFX1030W32-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1030W32-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W32-NEXT: global_store_byte v2, v3, s[2:3] ; GFX1030W32-NEXT: s_endpgm @@ -1918,9 +1933,8 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1030W64-NEXT: v_mov_b32_e32 v2, 0 ; GFX1030W64-NEXT: s_waitcnt lgkmcnt(0) ; GFX1030W64-NEXT: v_sub_co_u32 v0, s[4:5], s6, v0 -; GFX1030W64-NEXT: v_sub_co_ci_u32_e64 v1, null, s7, 0, s[4:5] -; GFX1030W64-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] -; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX1030W64-NEXT: v_sub_co_ci_u32_e64 v1, s[4:5], s7, 0, s[4:5] +; GFX1030W64-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] ; GFX1030W64-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX1030W64-NEXT: global_store_byte v2, v3, s[2:3] ; GFX1030W64-NEXT: s_endpgm @@ -1935,10 +1949,9 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_co_u32 v0, s4, s6, v0 -; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, s7, 0, s4 +; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: global_store_b8 v2, v3, s[2:3] @@ -1949,16 +1962,17 @@ define amdgpu_kernel void @vusubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v1, 0 ; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1250-NEXT: v_sub_nc_u64_e32 v[2:3], s[6:7], v[0:1] -; GFX1250-NEXT: v_cmp_lt_u64_e32 vcc_lo, s[6:7], v[2:3] -; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: v_sub_co_u32 v0, s4, s6, v0 +; GFX1250-NEXT: v_sub_co_ci_u32_e64 v1, s4, s7, 0, s4 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1250-NEXT: s_clause 0x1 -; GFX1250-NEXT: global_store_b64 v1, v[2:3], s[0:1] -; GFX1250-NEXT: global_store_b8 v1, v0, s[2:3] +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: global_store_b8 v2, v3, s[2:3] ; GFX1250-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll index 743431c..d6a9cb1 100644 --- a/llvm/test/CodeGen/AMDGPU/fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/fsub.ll @@ -92,43 +92,11 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp ret void } -; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32: -; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} -; SI-NOT: xor -define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { - %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1 - %a = load float, ptr addrspace(1) %in, align 4 - %b = load float, ptr addrspace(1) %b_ptr, align 4 - %result = fsub float %a, %b - %neg.result = fsub float -0.0, %result - store float %neg.result, ptr addrspace(1) %out, align 4 - ret void -} - -; For some reason the attribute has a string "true" or "false", so -; make sure it is disabled and the fneg is not folded if it is not -; "true". -; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32: -; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} -; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]] -define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { - %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1 - %a = load float, ptr addrspace(1) %in, align 4 - %b = load float, ptr addrspace(1) %b_ptr, align 4 - %result = fsub float %a, %b - %neg.result = fsub float -0.0, %result - store float %neg.result, ptr addrspace(1) %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32: +; FUNC-LABEL: {{^}}v_fsub_0_nsz_flag_f32: ; SI-NOT: v_sub -define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +define amdgpu_kernel void @v_fsub_0_nsz_flag_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { %a = load float, ptr addrspace(1) %in, align 4 - %result = fsub float %a, 0.0 + %result = fsub nsz float %a, 0.0 store float %result, ptr addrspace(1) %out, align 4 ret void } - -attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } -attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/mad_int24.ll b/llvm/test/CodeGen/AMDGPU/mad_int24.ll index 93fda94..dd88310 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_int24.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_int24.ll @@ -1,17 +1,79 @@ -; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=GCN --check-prefix=FUNC -; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=GCN --check-prefix=FUNC -; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=amdgcn| FileCheck %s --check-prefixes=GCN +; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=VI +; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG,R600,RW +; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=EG,R600,CM -; FUNC-LABEL: {{^}}i32_mad24: ; Signed 24-bit multiply is not supported on pre-Cayman GPUs. -; EG: MULLO_INT -; CM: MULLO_INT -; GCN: s_bfe_i32 -; GCN: s_bfe_i32 -; GCN: s_mul_i32 -; GCN: s_add_i32 define amdgpu_kernel void @i32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { +; GCN-LABEL: i32_mad24: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000 +; GCN-NEXT: s_bfe_i32 s1, s1, 0x180000 +; GCN-NEXT: s_mul_i32 s0, s0, s1 +; GCN-NEXT: s_add_i32 s0, s0, s2 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm +; +; VI-LABEL: i32_mad24: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_bfe_i32 s0, s0, 0x180000 +; VI-NEXT: s_bfe_i32 s1, s1, 0x180000 +; VI-NEXT: s_mul_i32 s0, s0, s1 +; VI-NEXT: s_add_i32 s0, s0, s2 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; RW-LABEL: i32_mad24: +; RW: ; %bb.0: ; %entry +; RW-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] +; RW-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; RW-NEXT: CF_END +; RW-NEXT: PAD +; RW-NEXT: ALU clause starting at 4: +; RW-NEXT: LSHL T0.W, KC0[2].Z, literal.x, +; RW-NEXT: LSHL * T1.W, KC0[2].W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: ASHR T1.W, PS, literal.x, +; RW-NEXT: ASHR * T0.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: MULLO_INT * T0.X, PS, PV.W, +; RW-NEXT: ADD_INT T0.X, PS, KC0[3].X, +; RW-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; RW-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: i32_mad24: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: LSHL T0.Z, KC0[2].Z, literal.x, +; CM-NEXT: LSHL * T0.W, KC0[2].W, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: ASHR T1.Z, PV.W, literal.x, +; CM-NEXT: ASHR * T0.W, PV.Z, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T0.W, T1.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T1.Z, +; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].X, +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: %0 = shl i32 %a, 8 %a_24 = ashr i32 %0, 8 @@ -23,13 +85,25 @@ entry: ret void } -; GCN-LABEL: {{^}}mad24_known_bits_destroyed: -; GCN: s_waitcnt -; GCN-NEXT: v_mad_i32_i24 -; GCN-NEXT: v_mul_i32_i24 -; GCN-NEXT: s_setpc_b64 define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) { - +; GCN-LABEL: mad24_known_bits_destroyed: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mad_i32_i24 v1, v0, v1, v2 +; GCN-NEXT: v_mul_i32_i24_e32 v0, v1, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: mad24_known_bits_destroyed: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mad_i32_i24 v1, v0, v1, v2 +; VI-NEXT: v_mul_i32_i24_e32 v0, v1, v0 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; EG-LABEL: mad24_known_bits_destroyed: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD %shl.0 = shl i32 %a, 8 %sra.0 = ashr i32 %shl.0, 8 %shl.1 = shl i32 %b, 8 @@ -48,12 +122,25 @@ define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) { ret i32 %mul1 } -; GCN-LABEL: {{^}}mad24_intrin_known_bits_destroyed: -; GCN: s_waitcnt -; GCN-NEXT: v_mad_i32_i24 -; GCN-NEXT: v_mul_i32_i24 -; GCN-NEXT: s_setpc_b64 define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) { +; GCN-LABEL: mad24_intrin_known_bits_destroyed: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mad_i32_i24 v1, v0, v1, v2 +; GCN-NEXT: v_mul_i32_i24_e32 v0, v1, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: mad24_intrin_known_bits_destroyed: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mad_i32_i24 v1, v0, v1, v2 +; VI-NEXT: v_mul_i32_i24_e32 v0, v1, v0 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; EG-LABEL: mad24_intrin_known_bits_destroyed: +; EG: ; %bb.0: +; EG-NEXT: CF_END +; EG-NEXT: PAD %shl.0 = shl i32 %a, 8 %sra.0 = ashr i32 %shl.0, 8 %shl.1 = shl i32 %b, 8 @@ -73,17 +160,177 @@ define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) { } ; Make sure no unnecessary BFEs are emitted in the loop. -; GCN-LABEL: {{^}}mad24_destroyed_knownbits_2: -; GCN-NOT: v_bfe -; GCN: v_mad_i32_i24 -; GCN-NOT: v_bfe -; GCN: v_mad_i32_i24 -; GCN-NOT: v_bfe -; GCN: v_mad_i32_i24 -; GCN-NOT: v_bfe -; GCN: v_mad_i32_i24 -; GCN-NOT: v_bfe define void @mad24_destroyed_knownbits_2(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %arg3) { +; GCN-LABEL: mad24_destroyed_knownbits_2: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v5, 1 +; GCN-NEXT: s_mov_b64 s[4:5], 0 +; GCN-NEXT: .LBB3_1: ; %bb6 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_mad_i32_i24 v0, v0, v5, v5 +; GCN-NEXT: v_add_i32_e32 v1, vcc, -1, v1 +; GCN-NEXT: v_mad_i32_i24 v5, v0, v5, v0 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GCN-NEXT: v_mad_i32_i24 v0, v5, v0, v5 +; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GCN-NEXT: v_mad_i32_i24 v0, v0, v5, v0 +; GCN-NEXT: v_mov_b32_e32 v5, v2 +; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GCN-NEXT: s_cbranch_execnz .LBB3_1 +; GCN-NEXT: ; %bb.2: ; %bb5 +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s6 +; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64 +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: mad24_destroyed_knownbits_2: +; VI: ; %bb.0: ; %bb +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v5, 1 +; VI-NEXT: s_mov_b64 s[4:5], 0 +; VI-NEXT: .LBB3_1: ; %bb6 +; VI-NEXT: ; =>This Inner Loop Header: Depth=1 +; VI-NEXT: v_mad_i32_i24 v0, v0, v5, v5 +; VI-NEXT: v_mad_i32_i24 v5, v0, v5, v0 +; VI-NEXT: v_add_u32_e32 v1, vcc, -1, v1 +; VI-NEXT: v_mad_i32_i24 v0, v5, v0, v5 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mad_i32_i24 v0, v0, v5, v0 +; VI-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; VI-NEXT: v_mov_b32_e32 v5, v2 +; VI-NEXT: s_andn2_b64 exec, exec, s[4:5] +; VI-NEXT: s_cbranch_execnz .LBB3_1 +; VI-NEXT: ; %bb.2: ; %bb5 +; VI-NEXT: s_or_b64 exec, exec, s[4:5] +; VI-NEXT: flat_store_dword v[3:4], v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: s_setpc_b64 s[30:31] +; +; RW-LABEL: mad24_destroyed_knownbits_2: +; RW: ; %bb.0: ; %bb +; RW-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[] +; RW-NEXT: LOOP_START_DX10 @7 +; RW-NEXT: ALU_PUSH_BEFORE 30, @16, KC0[], KC1[] +; RW-NEXT: JUMP @6 POP:1 +; RW-NEXT: LOOP_BREAK @6 +; RW-NEXT: POP @6 POP:1 +; RW-NEXT: END_LOOP @2 +; RW-NEXT: ALU 1, @47, KC0[], KC1[] +; RW-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; RW-NEXT: CF_END +; RW-NEXT: ALU clause starting at 10: +; RW-NEXT: MOV T0.X, KC0[2].Y, +; RW-NEXT: MOV T0.Y, KC0[2].Z, +; RW-NEXT: MOV * T0.Z, KC0[2].W, +; RW-NEXT: MOV T0.W, KC0[3].X, +; RW-NEXT: MOV * T1.W, literal.x, +; RW-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; RW-NEXT: ALU clause starting at 16: +; RW-NEXT: LSHL T2.W, T1.W, literal.x, +; RW-NEXT: LSHL * T3.W, T0.X, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: ASHR T3.W, PS, literal.x, +; RW-NEXT: ASHR * T2.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: MULLO_INT * T0.X, PV.W, PS, +; RW-NEXT: ADD_INT * T1.W, PS, T1.W, +; RW-NEXT: LSHL * T3.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: ASHR * T3.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: MULLO_INT * T0.X, PV.W, T2.W, +; RW-NEXT: ADD_INT * T1.W, PS, T1.W, +; RW-NEXT: LSHL * T2.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: ASHR * T2.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: MULLO_INT * T0.X, PV.W, T3.W, +; RW-NEXT: ADD_INT * T1.W, PS, T1.W, +; RW-NEXT: LSHL * T3.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: ASHR * T3.W, PV.W, literal.x, +; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; RW-NEXT: ADD_INT T0.Y, T0.Y, literal.x, +; RW-NEXT: MULLO_INT * T0.X, PV.W, T2.W, +; RW-NEXT: -1(nan), 0(0.000000e+00) +; RW-NEXT: ADD_INT T0.X, PS, T1.W, +; RW-NEXT: SETE_INT T2.W, PV.Y, 0.0, +; RW-NEXT: MOV * T1.W, T0.Z, +; RW-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, +; RW-NEXT: ALU clause starting at 47: +; RW-NEXT: LSHR * T1.X, T0.W, literal.x, +; RW-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: mad24_destroyed_knownbits_2: +; CM: ; %bb.0: ; %bb +; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[] +; CM-NEXT: LOOP_START_DX10 @7 +; CM-NEXT: ALU_PUSH_BEFORE 41, @16, KC0[], KC1[] +; CM-NEXT: JUMP @6 POP:1 +; CM-NEXT: LOOP_BREAK @6 +; CM-NEXT: POP @6 POP:1 +; CM-NEXT: END_LOOP @2 +; CM-NEXT: ALU 1, @58, KC0[], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X +; CM-NEXT: CF_END +; CM-NEXT: ALU clause starting at 10: +; CM-NEXT: MOV * T1.X, KC0[2].Y, +; CM-NEXT: MOV T0.X, KC0[2].Z, +; CM-NEXT: MOV T0.Y, KC0[2].W, +; CM-NEXT: MOV T0.Z, KC0[3].X, +; CM-NEXT: MOV * T0.W, literal.x, +; CM-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; CM-NEXT: ALU clause starting at 16: +; CM-NEXT: LSHL T1.Z, T0.W, literal.x, +; CM-NEXT: LSHL * T1.W, T1.X, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: ASHR T2.Z, PV.W, literal.x, +; CM-NEXT: ASHR * T1.W, PV.Z, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T1.X, T2.Z, T1.W, +; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.Z, T1.W, +; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.Z, T1.W, +; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.Z, T1.W, +; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W, +; CM-NEXT: LSHL * T2.W, PV.W, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: ASHR * T2.W, PV.W, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T1.X, T2.W, T1.W, +; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.W, T1.W, +; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.W, T1.W, +; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.W, T1.W, +; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W, +; CM-NEXT: LSHL * T1.W, PV.W, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: ASHR * T1.W, PV.W, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T1.X, T1.W, T2.W, +; CM-NEXT: MULLO_INT T1.Y (MASKED), T1.W, T2.W, +; CM-NEXT: MULLO_INT T1.Z (MASKED), T1.W, T2.W, +; CM-NEXT: MULLO_INT * T1.W (MASKED), T1.W, T2.W, +; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W, +; CM-NEXT: LSHL * T2.W, PV.W, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: ADD_INT T0.X, T0.X, literal.x, +; CM-NEXT: ASHR * T2.W, PV.W, literal.y, +; CM-NEXT: -1(nan), 8(1.121039e-44) +; CM-NEXT: MULLO_INT T1.X, T2.W, T1.W, +; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.W, T1.W, +; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.W, T1.W, +; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.W, T1.W, +; CM-NEXT: ADD_INT T1.X, PV.X, T0.W, +; CM-NEXT: SETE_INT T1.Z, T0.X, 0.0, +; CM-NEXT: MOV * T0.W, T0.Y, +; CM-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.Z, 0.0, +; CM-NEXT: ALU clause starting at 58: +; CM-NEXT: LSHR * T0.X, T0.Z, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) bb: br label %bb6 @@ -119,3 +366,5 @@ bb6: ; preds = %bb6, %bb } declare i32 @llvm.amdgcn.mul.i24(i32, i32) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; R600: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll index a6d458e..46b8df4 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll @@ -1,19 +1,75 @@ -; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC -; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN -; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 -; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG +; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=CM +; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefixes=GCN +; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,SI +; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,VI declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone -; FUNC-LABEL: {{^}}u32_mad24: -; EG: MULLO_INT -; SI: s_mul_i32 -; SI: s_add_i32 -; VI: s_mul_{{[iu]}}32 -; VI: s_add_{{[iu]}}32 - define amdgpu_kernel void @u32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) { +; EG-LABEL: u32_mad24: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: ALU clause starting at 4: +; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x, +; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULLO_INT * T0.X, PS, PV.W, +; EG-NEXT: ADD_INT T0.X, PS, KC0[3].X, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: u32_mad24: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, +; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z, +; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].X, +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: u32_mad24: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_and_b32 s0, s0, 0xffffff +; GCN-NEXT: s_and_b32 s1, s1, 0xffffff +; GCN-NEXT: s_mul_i32 s0, s0, s1 +; GCN-NEXT: s_add_i32 s0, s0, s2 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm +; +; GFX8-LABEL: u32_mad24: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s6, -1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s0, s0, 0xffffff +; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff +; GFX8-NEXT: s_mul_i32 s0, s0, s1 +; GFX8-NEXT: s_add_i32 s0, s0, s2 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX8-NEXT: s_endpgm entry: %0 = shl i32 %a, 8 %a_24 = lshr i32 %0, 8 @@ -25,18 +81,88 @@ entry: ret void } -; FUNC-LABEL: {{^}}i16_mad24: ; The order of A and B does not matter. -; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] ; The result must be sign-extended -; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x -; EG: 16 -; GCN: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}} -; GCN: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}} -; GCN: s_sext_i32_i16 [[EXT:s[0-9]]], [[MAD]] -; GCN: v_mov_b32_e32 v0, [[EXT]] define amdgpu_kernel void @i16_mad24(ptr addrspace(1) %out, i16 %a, i16 %b, i16 %c) { +; EG-LABEL: i16_mad24: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @12, KC0[], KC1[] +; EG-NEXT: TEX 2 @6 +; EG-NEXT: ALU 4, @13, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_16 T1.X, T0.X, 40, #3 +; EG-NEXT: VTX_READ_16 T2.X, T0.X, 42, #3 +; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 +; EG-NEXT: ALU clause starting at 12: +; EG-NEXT: MOV * T0.X, 0.0, +; EG-NEXT: ALU clause starting at 13: +; EG-NEXT: MULLO_INT * T0.Y, T1.X, T2.X, +; EG-NEXT: ADD_INT * T0.W, PS, T0.X, +; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) +; +; CM-LABEL: i16_mad24: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 0, @12, KC0[], KC1[] +; CM-NEXT: TEX 2 @6 +; CM-NEXT: ALU 8, @13, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: Fetch clause starting at 6: +; CM-NEXT: VTX_READ_16 T1.X, T0.X, 40, #3 +; CM-NEXT: VTX_READ_16 T2.X, T0.X, 42, #3 +; CM-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 +; CM-NEXT: ALU clause starting at 12: +; CM-NEXT: MOV * T0.X, 0.0, +; CM-NEXT: ALU clause starting at 13: +; CM-NEXT: MULLO_INT T0.X (MASKED), T1.X, T2.X, +; CM-NEXT: MULLO_INT T0.Y, T1.X, T2.X, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.X, T2.X, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T1.X, T2.X, +; CM-NEXT: ADD_INT * T0.W, PV.Y, T0.X, +; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x, +; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: i16_mad24: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_load_dword s4, s[4:5], 0xb +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s2, s2, 16 +; GCN-NEXT: s_mul_i32 s2, s4, s2 +; GCN-NEXT: s_add_i32 s2, s2, s3 +; GCN-NEXT: s_sext_i32_i16 s2, s2 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm +; +; GFX8-LABEL: i16_mad24: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX8-NEXT: s_load_dword s8, s[4:5], 0x2c +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s6, -1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, s0 +; GFX8-NEXT: s_lshr_b32 s0, s2, 16 +; GFX8-NEXT: s_mul_i32 s0, s8, s0 +; GFX8-NEXT: s_add_i32 s0, s0, s3 +; GFX8-NEXT: s_sext_i32_i16 s0, s0 +; GFX8-NEXT: s_mov_b32 s5, s1 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX8-NEXT: s_endpgm entry: %0 = mul i16 %a, %b %1 = add i16 %0, %c @@ -46,17 +172,85 @@ entry: } ; FIXME: Need to handle non-uniform case for function below (load without gep). -; FUNC-LABEL: {{^}}i8_mad24: -; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] ; The result must be sign-extended -; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x -; EG: 8 -; GCN: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}} -; GCN: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}} -; GCN: s_sext_i32_i8 [[EXT:s[0-9]]], [[MAD]] -; GCN: v_mov_b32_e32 v0, [[EXT]] define amdgpu_kernel void @i8_mad24(ptr addrspace(1) %out, i8 %a, i8 %b, i8 %c) { +; EG-LABEL: i8_mad24: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 0, @12, KC0[], KC1[] +; EG-NEXT: TEX 2 @6 +; EG-NEXT: ALU 4, @13, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 40, #3 +; EG-NEXT: VTX_READ_8 T2.X, T0.X, 41, #3 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 42, #3 +; EG-NEXT: ALU clause starting at 12: +; EG-NEXT: MOV * T0.X, 0.0, +; EG-NEXT: ALU clause starting at 13: +; EG-NEXT: MULLO_INT * T0.Y, T1.X, T2.X, +; EG-NEXT: ADD_INT * T0.W, PS, T0.X, +; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45) +; +; CM-LABEL: i8_mad24: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 0, @12, KC0[], KC1[] +; CM-NEXT: TEX 2 @6 +; CM-NEXT: ALU 8, @13, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: Fetch clause starting at 6: +; CM-NEXT: VTX_READ_8 T1.X, T0.X, 40, #3 +; CM-NEXT: VTX_READ_8 T2.X, T0.X, 41, #3 +; CM-NEXT: VTX_READ_8 T0.X, T0.X, 42, #3 +; CM-NEXT: ALU clause starting at 12: +; CM-NEXT: MOV * T0.X, 0.0, +; CM-NEXT: ALU clause starting at 13: +; CM-NEXT: MULLO_INT T0.X (MASKED), T1.X, T2.X, +; CM-NEXT: MULLO_INT T0.Y, T1.X, T2.X, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.X, T2.X, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T1.X, T2.X, +; CM-NEXT: ADD_INT * T0.W, PV.Y, T0.X, +; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: i8_mad24: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s2, s[4:5], 0xb +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s4, s2, 8 +; GCN-NEXT: s_lshr_b32 s5, s2, 16 +; GCN-NEXT: s_mul_i32 s2, s2, s4 +; GCN-NEXT: s_add_i32 s2, s2, s5 +; GCN-NEXT: s_sext_i32_i8 s4, s2 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm +; +; GFX8-LABEL: i8_mad24: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8-NEXT: s_mov_b32 s3, 0xf000 +; GFX8-NEXT: s_mov_b32 s2, -1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s4, s6, 8 +; GFX8-NEXT: s_lshr_b32 s5, s6, 16 +; GFX8-NEXT: s_mul_i32 s4, s6, s4 +; GFX8-NEXT: s_add_i32 s4, s4, s5 +; GFX8-NEXT: s_sext_i32_i8 s4, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX8-NEXT: s_endpgm entry: %0 = mul i8 %a, %b %1 = add i8 %0, %c @@ -72,11 +266,75 @@ entry: ; 24-bit mad pattern wasn't being matched. ; Check that the select instruction is not deleted. -; FUNC-LABEL: {{^}}i24_i32_i32_mad: -; EG: CNDE_INT -; SI: s_cselect -; GCN2: s_cselect define amdgpu_kernel void @i24_i32_i32_mad(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) { +; EG-LABEL: i24_i32_i32_mad: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: ALU clause starting at 4: +; EG-NEXT: ASHR * T0.W, KC0[2].Z, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W, +; EG-NEXT: 34(4.764415e-44), 0(0.000000e+00) +; EG-NEXT: MULLO_INT * T0.X, PV.W, KC0[3].X, +; EG-NEXT: ADD_INT T0.X, PS, KC0[3].Y, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: i24_i32_i32_mad: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: ALU clause starting at 4: +; CM-NEXT: ASHR * T0.W, KC0[2].Z, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W, +; CM-NEXT: 34(4.764415e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T0.W, KC0[3].X, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, KC0[3].X, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, KC0[3].X, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, KC0[3].X, +; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].Y, +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: i24_i32_i32_mad: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s2, s[4:5], 0xb +; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_ashr_i32 s2, s2, 8 +; GCN-NEXT: s_cmp_lg_u32 s6, 0 +; GCN-NEXT: s_cselect_b32 s2, s2, 34 +; GCN-NEXT: s_mul_i32 s2, s2, s6 +; GCN-NEXT: s_add_i32 s4, s2, s7 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm +; +; GFX8-LABEL: i24_i32_i32_mad: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_load_dword s8, s[4:5], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8-NEXT: s_mov_b32 s3, 0xf000 +; GFX8-NEXT: s_mov_b32 s2, -1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_ashr_i32 s4, s8, 8 +; GFX8-NEXT: s_cmp_lg_u32 s6, 0 +; GFX8-NEXT: s_cselect_b32 s4, s4, 34 +; GFX8-NEXT: s_mul_i32 s4, s4, s6 +; GFX8-NEXT: s_add_i32 s4, s4, s7 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX8-NEXT: s_endpgm entry: %0 = ashr i32 %a, 8 %1 = icmp ne i32 %c, 0 @@ -87,13 +345,139 @@ entry: ret void } -; FUNC-LABEL: {{^}}extra_and: -; SI-NOT: v_and -; SI: s_mul_i32 -; SI: s_mul_i32 -; SI: s_add_i32 -; SI: s_add_i32 define amdgpu_kernel void @extra_and(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) { +; EG-LABEL: extra_and: +; EG: ; %bb.0: ; %bb +; EG-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: LOOP_START_DX10 @7 +; EG-NEXT: ALU_PUSH_BEFORE 12, @16, KC0[], KC1[] +; EG-NEXT: JUMP @6 POP:1 +; EG-NEXT: LOOP_BREAK @6 +; EG-NEXT: POP @6 POP:1 +; EG-NEXT: END_LOOP @2 +; EG-NEXT: ALU 1, @29, KC0[], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T1.W, literal.x, +; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) +; EG-NEXT: MOV * T3.W, PV.W, +; EG-NEXT: MOV T0.Z, KC0[2].Y, +; EG-NEXT: MOV T0.W, KC0[2].Z, +; EG-NEXT: MOV * T2.W, KC0[2].W, +; EG-NEXT: ALU clause starting at 16: +; EG-NEXT: AND_INT T1.W, T1.W, literal.x, +; EG-NEXT: AND_INT * T4.W, T3.W, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: AND_INT T3.W, T3.W, literal.x, +; EG-NEXT: MULLO_INT * T0.X, PS, PV.W, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W, +; EG-NEXT: ADD_INT T3.W, T2.W, PS, +; EG-NEXT: ADD_INT * T1.W, T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.X, PS, PV.W, +; EG-NEXT: SETNE_INT * T4.W, PV.X, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, +; EG-NEXT: ALU clause starting at 29: +; EG-NEXT: LSHR * T1.X, T0.Z, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: extra_and: +; CM: ; %bb.0: ; %bb +; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[] +; CM-NEXT: LOOP_START_DX10 @7 +; CM-NEXT: ALU_PUSH_BEFORE 17, @16, KC0[], KC1[] +; CM-NEXT: JUMP @6 POP:1 +; CM-NEXT: LOOP_BREAK @6 +; CM-NEXT: POP @6 POP:1 +; CM-NEXT: END_LOOP @2 +; CM-NEXT: ALU 1, @34, KC0[], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: ALU clause starting at 10: +; CM-NEXT: MOV * T0.W, literal.x, +; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00) +; CM-NEXT: MOV * T1.Z, PV.W, +; CM-NEXT: MOV T0.Y, KC0[2].Y, +; CM-NEXT: MOV T0.Z, KC0[2].Z, +; CM-NEXT: MOV * T1.W, KC0[2].W, +; CM-NEXT: ALU clause starting at 16: +; CM-NEXT: AND_INT T1.Y, T1.Z, literal.x, +; CM-NEXT: AND_INT T2.Z, T0.W, literal.x, +; CM-NEXT: AND_INT * T0.W, T1.Z, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T0.W, T2.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T2.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T2.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T2.Z, +; CM-NEXT: MULLO_INT T0.X (MASKED), T1.Y, T2.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T1.Y, T2.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.Y, T2.Z, +; CM-NEXT: MULLO_INT * T0.W, T1.Y, T2.Z, +; CM-NEXT: ADD_INT T1.Z, T1.W, PV.W, +; CM-NEXT: ADD_INT * T0.W, T0.Z, T0.X, +; CM-NEXT: ADD_INT * T0.X, PV.W, PV.Z, +; CM-NEXT: SETNE_INT * T2.W, PV.X, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, +; CM-NEXT: ALU clause starting at 34: +; CM-NEXT: LSHR * T1.X, T0.Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: extra_and: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb +; GCN-NEXT: s_mov_b32 s2, 0 +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: .LBB4_1: ; %bb4 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_and_b32 s3, s6, 0xffffff +; GCN-NEXT: s_and_b32 s6, s6, 0xffffff +; GCN-NEXT: s_and_b32 s2, s2, 0xffffff +; GCN-NEXT: s_mul_i32 s3, s3, s2 +; GCN-NEXT: s_mul_i32 s6, s6, s2 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_add_i32 s2, s0, s3 +; GCN-NEXT: s_add_i32 s6, s1, s6 +; GCN-NEXT: s_add_i32 s3, s2, s6 +; GCN-NEXT: s_cmp_lg_u32 s3, 8 +; GCN-NEXT: s_cbranch_scc1 .LBB4_1 +; GCN-NEXT: ; %bb.2: ; %bb18 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm +; +; GFX8-LABEL: extra_and: +; GFX8: ; %bb.0: ; %bb +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c +; GFX8-NEXT: s_mov_b32 s2, 0 +; GFX8-NEXT: s_mov_b32 s6, 0 +; GFX8-NEXT: .LBB4_1: ; %bb4 +; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: s_and_b32 s3, s6, 0xffffff +; GFX8-NEXT: s_and_b32 s6, s6, 0xffffff +; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff +; GFX8-NEXT: s_mul_i32 s3, s3, s2 +; GFX8-NEXT: s_mul_i32 s6, s6, s2 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_add_i32 s2, s0, s3 +; GFX8-NEXT: s_add_i32 s6, s1, s6 +; GFX8-NEXT: s_add_i32 s3, s2, s6 +; GFX8-NEXT: s_cmp_lg_u32 s3, 8 +; GFX8-NEXT: s_cbranch_scc1 .LBB4_1 +; GFX8-NEXT: ; %bb.2: ; %bb18 +; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s6, -1 +; GFX8-NEXT: v_mov_b32_e32 v0, s3 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX8-NEXT: s_endpgm bb: br label %bb4 @@ -119,13 +503,139 @@ bb18: ; preds = %bb4 ret void } -; FUNC-LABEL: {{^}}dont_remove_shift -; SI: s_lshr -; SI: s_mul_i32 -; SI: s_mul_i32 -; SI: s_add_i32 -; SI: s_add_i32 define amdgpu_kernel void @dont_remove_shift(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) { +; EG-LABEL: dont_remove_shift: +; EG: ; %bb.0: ; %bb +; EG-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: LOOP_START_DX10 @7 +; EG-NEXT: ALU_PUSH_BEFORE 12, @16, KC0[], KC1[] +; EG-NEXT: JUMP @6 POP:1 +; EG-NEXT: LOOP_BREAK @6 +; EG-NEXT: POP @6 POP:1 +; EG-NEXT: END_LOOP @2 +; EG-NEXT: ALU 1, @29, KC0[], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T1.W, literal.x, +; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) +; EG-NEXT: MOV * T3.W, PV.W, +; EG-NEXT: MOV T0.Z, KC0[2].Y, +; EG-NEXT: MOV T0.W, KC0[2].Z, +; EG-NEXT: MOV * T2.W, KC0[2].W, +; EG-NEXT: ALU clause starting at 16: +; EG-NEXT: LSHR T1.W, T1.W, literal.x, +; EG-NEXT: LSHR * T4.W, T3.W, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: LSHR T3.W, T3.W, literal.x, +; EG-NEXT: MULLO_INT * T0.X, PS, PV.W, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W, +; EG-NEXT: ADD_INT T3.W, T2.W, PS, +; EG-NEXT: ADD_INT * T1.W, T0.W, T0.X, +; EG-NEXT: ADD_INT * T0.X, PS, PV.W, +; EG-NEXT: SETNE_INT * T4.W, PV.X, literal.x, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, +; EG-NEXT: ALU clause starting at 29: +; EG-NEXT: LSHR * T1.X, T0.Z, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: dont_remove_shift: +; CM: ; %bb.0: ; %bb +; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[] +; CM-NEXT: LOOP_START_DX10 @7 +; CM-NEXT: ALU_PUSH_BEFORE 17, @16, KC0[], KC1[] +; CM-NEXT: JUMP @6 POP:1 +; CM-NEXT: LOOP_BREAK @6 +; CM-NEXT: POP @6 POP:1 +; CM-NEXT: END_LOOP @2 +; CM-NEXT: ALU 1, @34, KC0[], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: ALU clause starting at 10: +; CM-NEXT: MOV * T0.W, literal.x, +; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00) +; CM-NEXT: MOV * T1.Z, PV.W, +; CM-NEXT: MOV T0.Y, KC0[2].Y, +; CM-NEXT: MOV T0.Z, KC0[2].Z, +; CM-NEXT: MOV * T1.W, KC0[2].W, +; CM-NEXT: ALU clause starting at 16: +; CM-NEXT: LSHR T1.Y, T1.Z, literal.x, +; CM-NEXT: LSHR T2.Z, T0.W, literal.x, +; CM-NEXT: LSHR * T0.W, T1.Z, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T0.W, T2.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T2.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T2.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T2.Z, +; CM-NEXT: MULLO_INT T0.X (MASKED), T1.Y, T2.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T1.Y, T2.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.Y, T2.Z, +; CM-NEXT: MULLO_INT * T0.W, T1.Y, T2.Z, +; CM-NEXT: ADD_INT T1.Z, T1.W, PV.W, +; CM-NEXT: ADD_INT * T0.W, T0.Z, T0.X, +; CM-NEXT: ADD_INT * T0.X, PV.W, PV.Z, +; CM-NEXT: SETNE_INT * T2.W, PV.X, literal.x, +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, +; CM-NEXT: ALU clause starting at 34: +; CM-NEXT: LSHR * T1.X, T0.Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: dont_remove_shift: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb +; GCN-NEXT: s_mov_b32 s2, 0 +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: .LBB5_1: ; %bb4 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: s_lshr_b32 s3, s6, 8 +; GCN-NEXT: s_lshr_b32 s6, s6, 8 +; GCN-NEXT: s_lshr_b32 s2, s2, 8 +; GCN-NEXT: s_mul_i32 s3, s3, s2 +; GCN-NEXT: s_mul_i32 s6, s6, s2 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_add_i32 s2, s0, s3 +; GCN-NEXT: s_add_i32 s6, s1, s6 +; GCN-NEXT: s_add_i32 s3, s2, s6 +; GCN-NEXT: s_cmp_lg_u32 s3, 8 +; GCN-NEXT: s_cbranch_scc1 .LBB5_1 +; GCN-NEXT: ; %bb.2: ; %bb18 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm +; +; GFX8-LABEL: dont_remove_shift: +; GFX8: ; %bb.0: ; %bb +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c +; GFX8-NEXT: s_mov_b32 s2, 0 +; GFX8-NEXT: s_mov_b32 s6, 0 +; GFX8-NEXT: .LBB5_1: ; %bb4 +; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: s_lshr_b32 s3, s6, 8 +; GFX8-NEXT: s_lshr_b32 s6, s6, 8 +; GFX8-NEXT: s_lshr_b32 s2, s2, 8 +; GFX8-NEXT: s_mul_i32 s3, s3, s2 +; GFX8-NEXT: s_mul_i32 s6, s6, s2 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_add_i32 s2, s0, s3 +; GFX8-NEXT: s_add_i32 s6, s1, s6 +; GFX8-NEXT: s_add_i32 s3, s2, s6 +; GFX8-NEXT: s_cmp_lg_u32 s3, 8 +; GFX8-NEXT: s_cbranch_scc1 .LBB5_1 +; GFX8-NEXT: ; %bb.2: ; %bb18 +; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s6, -1 +; GFX8-NEXT: v_mov_b32_e32 v0, s3 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX8-NEXT: s_endpgm bb: br label %bb4 @@ -151,19 +661,234 @@ bb18: ; preds = %bb4 ret void } -; FUNC-LABEL: {{^}}i8_mad_sat_16: -; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; The result must be sign-extended -; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x -; EG: 8 -; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; SI: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16 -; SI: v_med3_i32 v{{[0-9]}}, [[EXT]], -; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; VI: v_max_i16_e32 [[MAX:v[0-9]]], 0xff80, [[MAD]] -; VI: v_min_i16_e32 {{v[0-9]}}, 0x7f, [[MAX]] define amdgpu_kernel void @i8_mad_sat_16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(5) %idx) { +; EG-LABEL: i8_mad_sat_16: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @8 +; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @10 +; EG-NEXT: ALU 24, @21, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1 +; EG-NEXT: Fetch clause starting at 10: +; EG-NEXT: VTX_READ_8 T3.X, T3.X, 0, #1 +; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1 +; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W, +; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+, +; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X, +; EG-NEXT: ALU clause starting at 19: +; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X, +; EG-NEXT: ADD_INT * T3.X, KC0[3].X, T0.X, +; EG-NEXT: ALU clause starting at 21: +; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x, +; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: BFE_INT T1.W, T3.X, 0.0, literal.x, +; EG-NEXT: MULLO_INT * T0.Y, PV.Z, PV.W, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x, +; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; EG-NEXT: MAX_INT T0.W, PV.W, literal.x, +; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, T0.X, +; EG-NEXT: -128(nan), 0(0.000000e+00) +; EG-NEXT: AND_INT T2.W, PS, literal.x, +; EG-NEXT: MIN_INT * T0.W, PV.W, literal.y, +; EG-NEXT: 3(4.203895e-45), 127(1.779649e-43) +; EG-NEXT: AND_INT T0.W, PS, literal.x, +; EG-NEXT: LSHL * T2.W, PV.W, literal.y, +; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, T1.W, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; CM-LABEL: i8_mad_sat_16: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[] +; CM-NEXT: TEX 0 @8 +; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[] +; CM-NEXT: TEX 1 @10 +; CM-NEXT: ALU 26, @21, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT MSKOR T1.XW, T0.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: Fetch clause starting at 8: +; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1 +; CM-NEXT: Fetch clause starting at 10: +; CM-NEXT: VTX_READ_8 T3.X, T3.X, 0, #1 +; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1 +; CM-NEXT: ALU clause starting at 14: +; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W, +; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+, +; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X, +; CM-NEXT: ALU clause starting at 19: +; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X, +; CM-NEXT: ADD_INT * T3.X, KC0[2].Z, T0.X, +; CM-NEXT: ALU clause starting at 21: +; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x, +; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: BFE_INT * T0.W, T3.X, 0.0, literal.x, BS:VEC_201 +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Z, T0.W, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.Z, T0.W, +; CM-NEXT: MULLO_INT T0.Z, T0.Z, T0.W, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Z, T0.W, +; CM-NEXT: ADD_INT * T0.W, PV.Z, T0.Y, +; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x, +; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; CM-NEXT: MAX_INT T0.Z, PV.W, literal.x, +; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.X, +; CM-NEXT: -128(nan), 0(0.000000e+00) +; CM-NEXT: AND_INT T1.Z, PV.W, literal.x, +; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.y, +; CM-NEXT: 3(4.203895e-45), 127(1.779649e-43) +; CM-NEXT: AND_INT T0.Z, PV.W, literal.x, +; CM-NEXT: LSHL * T1.W, PV.Z, literal.y, +; CM-NEXT: 255(3.573311e-43), 3(4.203895e-45) +; CM-NEXT: LSHL T1.X, PV.Z, PV.W, +; CM-NEXT: LSHL * T1.W, literal.x, PV.W, +; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; CM-NEXT: MOV T1.Y, 0.0, +; CM-NEXT: MOV * T1.Z, 0.0, +; CM-NEXT: LSHR * T0.X, T0.W, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: i8_mad_sat_16: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s22, -1 +; GCN-NEXT: s_mov_b32 s23, 0xe8f000 +; GCN-NEXT: s_add_u32 s20, s20, s11 +; GCN-NEXT: s_addc_u32 s21, s21, 0 +; GCN-NEXT: s_load_dword s8, s[4:5], 0x11 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_add_i32 s9, s8, 4 +; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: buffer_load_dword v1, v1, s[20:23], 0 offen +; GCN-NEXT: buffer_load_dword v0, v0, s[20:23], 0 offen +; GCN-NEXT: s_mov_b32 s11, 0xf000 +; GCN-NEXT: s_mov_b32 s10, 0 +; GCN-NEXT: s_mov_b64 s[14:15], s[10:11] +; GCN-NEXT: s_mov_b64 s[18:19], s[10:11] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[8:9], s[2:3] +; GCN-NEXT: s_mov_b64 s[12:13], s[4:5] +; GCN-NEXT: s_mov_b64 s[16:17], s[6:7] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64 +; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[8:11], 0 addr64 +; GCN-NEXT: buffer_load_sbyte v4, v[0:1], s[16:19], 0 addr64 +; GCN-NEXT: s_movk_i32 s2, 0xff80 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mad_u32_u24 v2, v2, v3, v4 +; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GCN-NEXT: v_mov_b32_e32 v3, 0x7f +; GCN-NEXT: v_med3_i32 v2, v2, s2, v3 +; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] +; GCN-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 +; GCN-NEXT: s_endpgm +; +; SI-LABEL: i8_mad_sat_16: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_load_dword s0, s[4:5], 0x44 +; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_mov_b32 s90, -1 +; SI-NEXT: s_mov_b32 s91, 0xe80000 +; SI-NEXT: s_add_u32 s88, s88, s11 +; SI-NEXT: s_addc_u32 s89, s89, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_i32 s1, s0, 4 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: buffer_load_dword v6, v0, s[88:91], 0 offen +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: buffer_load_dword v7, v0, s[88:91], 0 offen +; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v1, s3 +; SI-NEXT: v_mov_b32_e32 v3, s5 +; SI-NEXT: v_mov_b32_e32 v5, s7 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v6 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v6 +; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v6 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc +; SI-NEXT: flat_load_sbyte v0, v[0:1] +; SI-NEXT: flat_load_sbyte v1, v[2:3] +; SI-NEXT: flat_load_sbyte v2, v[4:5] +; SI-NEXT: v_mov_b32_e32 v3, s1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_mad_u16 v0, v1, v0, v2 +; SI-NEXT: v_max_i16_e32 v0, 0xff80, v0 +; SI-NEXT: v_min_i16_e32 v2, 0x7f, v0 +; SI-NEXT: v_add_u32_e32 v0, vcc, s0, v6 +; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v7, vcc +; SI-NEXT: flat_store_byte v[0:1], v2 +; SI-NEXT: s_endpgm +; +; VI-LABEL: i8_mad_sat_16: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_load_dword s0, s[4:5], 0x44 +; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s14, -1 +; VI-NEXT: s_mov_b32 s15, 0xe80000 +; VI-NEXT: s_add_u32 s12, s12, s11 +; VI-NEXT: s_addc_u32 s13, s13, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_i32 s1, s0, 4 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: buffer_load_dword v6, v0, s[12:15], 0 offen +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: buffer_load_dword v7, v0, s[12:15], 0 offen +; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_mov_b32_e32 v5, s7 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v6 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v6 +; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v6 +; VI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc +; VI-NEXT: flat_load_sbyte v0, v[0:1] +; VI-NEXT: flat_load_sbyte v1, v[2:3] +; VI-NEXT: flat_load_sbyte v2, v[4:5] +; VI-NEXT: v_mov_b32_e32 v3, s1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_mad_u16 v0, v1, v0, v2 +; VI-NEXT: v_max_i16_e32 v0, 0xff80, v0 +; VI-NEXT: v_min_i16_e32 v2, 0x7f, v0 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v6 +; VI-NEXT: v_addc_u32_e32 v1, vcc, v3, v7, vcc +; VI-NEXT: flat_store_byte v[0:1], v2 +; VI-NEXT: s_endpgm entry: %retval.0.i = load i64, ptr addrspace(5) %idx %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 %retval.0.i @@ -187,16 +912,201 @@ entry: ret void } -; FUNC-LABEL: {{^}}i8_mad_32: -; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; The result must be sign-extended -; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x -; EG: 8 -; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16 define amdgpu_kernel void @i8_mad_32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) { +; EG-LABEL: i8_mad_32: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @8 +; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @10 +; EG-NEXT: ALU 9, @21, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1 +; EG-NEXT: Fetch clause starting at 10: +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1 +; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W, +; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+, +; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X, +; EG-NEXT: ALU clause starting at 19: +; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X, +; EG-NEXT: ADD_INT * T0.X, KC0[3].X, T0.X, +; EG-NEXT: ALU clause starting at 21: +; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x, +; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: BFE_INT T1.W, T0.X, 0.0, literal.x, +; EG-NEXT: MULLO_INT * T0.X, PV.W, PV.Z, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) +; +; CM-LABEL: i8_mad_32: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[] +; CM-NEXT: TEX 0 @8 +; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[] +; CM-NEXT: TEX 1 @10 +; CM-NEXT: ALU 12, @21, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: Fetch clause starting at 8: +; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1 +; CM-NEXT: Fetch clause starting at 10: +; CM-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1 +; CM-NEXT: ALU clause starting at 14: +; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W, +; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+, +; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X, +; CM-NEXT: ALU clause starting at 19: +; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X, +; CM-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X, +; CM-NEXT: ALU clause starting at 21: +; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x, +; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201 +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z, +; CM-NEXT: ADD_INT * T0.W, PV.X, T0.Y, +; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x, +; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; +; GCN-LABEL: i8_mad_32: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s26, -1 +; GCN-NEXT: s_mov_b32 s27, 0xe8f000 +; GCN-NEXT: s_add_u32 s24, s24, s11 +; GCN-NEXT: s_addc_u32 s25, s25, 0 +; GCN-NEXT: s_load_dword s8, s[4:5], 0x11 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_add_i32 s9, s8, 4 +; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: buffer_load_dword v1, v1, s[24:27], 0 offen +; GCN-NEXT: buffer_load_dword v0, v0, s[24:27], 0 offen +; GCN-NEXT: s_mov_b32 s11, 0xf000 +; GCN-NEXT: s_mov_b32 s14, 0 +; GCN-NEXT: s_mov_b32 s15, s11 +; GCN-NEXT: s_mov_b64 s[18:19], s[14:15] +; GCN-NEXT: s_mov_b64 s[22:23], s[14:15] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[12:13], s[2:3] +; GCN-NEXT: s_mov_b64 s[16:17], s[4:5] +; GCN-NEXT: s_mov_b64 s[20:21], s[6:7] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64 +; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64 +; GCN-NEXT: buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64 +; GCN-NEXT: s_mov_b32 s10, -1 +; GCN-NEXT: s_mov_b32 s8, s0 +; GCN-NEXT: s_mov_b32 s9, s1 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v2 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mad_u32_u24 v0, v1, v2, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GCN-NEXT: s_endpgm +; +; SI-LABEL: i8_mad_32: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_load_dword s0, s[4:5], 0x44 +; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_mov_b32 s90, -1 +; SI-NEXT: s_mov_b32 s91, 0xe80000 +; SI-NEXT: s_add_u32 s88, s88, s11 +; SI-NEXT: s_addc_u32 s89, s89, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_i32 s1, s0, 4 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: buffer_load_dword v4, v0, s[88:91], 0 offen +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: buffer_load_dword v5, v0, s[88:91], 0 offen +; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v1, s3 +; SI-NEXT: v_mov_b32_e32 v3, s5 +; SI-NEXT: v_mov_b32_e32 v6, s7 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc +; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v4 +; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v4 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc +; SI-NEXT: flat_load_sbyte v0, v[0:1] +; SI-NEXT: flat_load_sbyte v1, v[2:3] +; SI-NEXT: flat_load_sbyte v2, v[4:5] +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_mad_u16 v0, v0, v1, v2 +; SI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: i8_mad_32: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_load_dword s0, s[4:5], 0x44 +; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s14, -1 +; VI-NEXT: s_mov_b32 s15, 0xe80000 +; VI-NEXT: s_add_u32 s12, s12, s11 +; VI-NEXT: s_addc_u32 s13, s13, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_i32 s1, s0, 4 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: buffer_load_dword v4, v0, s[12:15], 0 offen +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: buffer_load_dword v5, v0, s[12:15], 0 offen +; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_mov_b32_e32 v6, s7 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v4 +; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v4 +; VI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc +; VI-NEXT: flat_load_sbyte v0, v[0:1] +; VI-NEXT: flat_load_sbyte v1, v[2:3] +; VI-NEXT: flat_load_sbyte v2, v[4:5] +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_mad_u16 v0, v0, v1, v2 +; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm entry: %retval.0.i = load i64, ptr addrspace(5) %idx %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i @@ -215,16 +1125,207 @@ entry: ret void } -; FUNC-LABEL: {{^}}i8_mad_64: -; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]] -; The result must be sign-extended -; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x -; EG: 8 -; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}} -; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16 define amdgpu_kernel void @i8_mad_64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) { +; EG-LABEL: i8_mad_64: +; EG: ; %bb.0: ; %entry +; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @8 +; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @10 +; EG-NEXT: ALU 11, @21, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 8: +; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1 +; EG-NEXT: Fetch clause starting at 10: +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1 +; EG-NEXT: ALU clause starting at 14: +; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W, +; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+, +; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X, +; EG-NEXT: ALU clause starting at 19: +; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X, +; EG-NEXT: ADD_INT * T0.X, KC0[3].X, T0.X, +; EG-NEXT: ALU clause starting at 21: +; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x, +; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212 +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: BFE_INT T1.W, T0.X, 0.0, literal.x, +; EG-NEXT: MULLO_INT * T0.X, PV.W, PV.Z, +; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) +; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, +; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) +; +; CM-LABEL: i8_mad_64: +; CM: ; %bb.0: ; %entry +; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[] +; CM-NEXT: TEX 0 @8 +; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[] +; CM-NEXT: TEX 1 @10 +; CM-NEXT: ALU 13, @21, KC0[CB0:0-32], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X +; CM-NEXT: CF_END +; CM-NEXT: PAD +; CM-NEXT: Fetch clause starting at 8: +; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1 +; CM-NEXT: Fetch clause starting at 10: +; CM-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1 +; CM-NEXT: ALU clause starting at 14: +; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W, +; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+, +; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X, +; CM-NEXT: ALU clause starting at 19: +; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X, +; CM-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X, +; CM-NEXT: ALU clause starting at 21: +; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x, +; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212 +; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201 +; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z, +; CM-NEXT: ADD_INT * T0.W, PV.X, T0.Y, +; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x, +; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x, +; CM-NEXT: ASHR * T0.Y, PV.X, literal.y, +; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) +; +; GCN-LABEL: i8_mad_64: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s26, -1 +; GCN-NEXT: s_mov_b32 s27, 0xe8f000 +; GCN-NEXT: s_add_u32 s24, s24, s11 +; GCN-NEXT: s_addc_u32 s25, s25, 0 +; GCN-NEXT: s_load_dword s8, s[4:5], 0x11 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_add_i32 s9, s8, 4 +; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: buffer_load_dword v1, v1, s[24:27], 0 offen +; GCN-NEXT: buffer_load_dword v0, v0, s[24:27], 0 offen +; GCN-NEXT: s_mov_b32 s11, 0xf000 +; GCN-NEXT: s_mov_b32 s14, 0 +; GCN-NEXT: s_mov_b32 s15, s11 +; GCN-NEXT: s_mov_b64 s[18:19], s[14:15] +; GCN-NEXT: s_mov_b64 s[22:23], s[14:15] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[12:13], s[2:3] +; GCN-NEXT: s_mov_b64 s[16:17], s[4:5] +; GCN-NEXT: s_mov_b64 s[20:21], s[6:7] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64 +; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64 +; GCN-NEXT: buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64 +; GCN-NEXT: s_mov_b32 s10, -1 +; GCN-NEXT: s_mov_b32 s8, s0 +; GCN-NEXT: s_mov_b32 s9, s1 +; GCN-NEXT: s_waitcnt vmcnt(2) +; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v2 +; GCN-NEXT: s_waitcnt vmcnt(1) +; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v3 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mad_u32_u24 v0, v1, v2, v0 +; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; GCN-NEXT: s_endpgm +; +; SI-LABEL: i8_mad_64: +; SI: ; %bb.0: ; %entry +; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_load_dword s0, s[4:5], 0x44 +; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_mov_b32 s90, -1 +; SI-NEXT: s_mov_b32 s91, 0xe80000 +; SI-NEXT: s_add_u32 s88, s88, s11 +; SI-NEXT: s_addc_u32 s89, s89, 0 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_i32 s1, s0, 4 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: buffer_load_dword v4, v0, s[88:91], 0 offen +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: buffer_load_dword v5, v0, s[88:91], 0 offen +; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v1, s3 +; SI-NEXT: v_mov_b32_e32 v3, s5 +; SI-NEXT: v_mov_b32_e32 v6, s7 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc +; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v4 +; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v4 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc +; SI-NEXT: flat_load_sbyte v0, v[0:1] +; SI-NEXT: flat_load_sbyte v1, v[2:3] +; SI-NEXT: flat_load_sbyte v2, v[4:5] +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_mad_u16 v0, v0, v1, v2 +; SI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: i8_mad_64: +; VI: ; %bb.0: ; %entry +; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_load_dword s0, s[4:5], 0x44 +; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s14, -1 +; VI-NEXT: s_mov_b32 s15, 0xe80000 +; VI-NEXT: s_add_u32 s12, s12, s11 +; VI-NEXT: s_addc_u32 s13, s13, 0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_i32 s1, s0, 4 +; VI-NEXT: v_mov_b32_e32 v0, s0 +; VI-NEXT: buffer_load_dword v4, v0, s[12:15], 0 offen +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: buffer_load_dword v5, v0, s[12:15], 0 offen +; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v1, s3 +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: v_mov_b32_e32 v6, s7 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v4 +; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v4 +; VI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc +; VI-NEXT: flat_load_sbyte v0, v[0:1] +; VI-NEXT: flat_load_sbyte v1, v[2:3] +; VI-NEXT: flat_load_sbyte v2, v[4:5] +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_mad_u16 v0, v0, v1, v2 +; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; VI-NEXT: s_endpgm entry: %retval.0.i = load i64, ptr addrspace(5) %idx %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i @@ -248,17 +1349,236 @@ entry: ; had a chance to form mul24. The mul combine would then see ; extractelement with no known bits and fail. All of the mul/add ; combos in this loop should form v_mad_u32_u24. - -; FUNC-LABEL: {{^}}mad24_known_bits_destroyed: -; GCN: v_mad_u32_u24 -; GCN: v_mad_u32_u24 -; GCN: v_mad_u32_u24 -; GCN: v_mad_u32_u24 -; GCN: v_mad_u32_u24 -; GCN: v_mad_u32_u24 -; GCN: v_mad_u32_u24 -; GCN: v_mad_u32_u24 define void @mad24_known_bits_destroyed(i32 %arg, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i32 %arg4, i32 %arg5, i32 %arg6, ptr addrspace(1) %arg7, ptr addrspace(1) %arg8) #0 { +; EG-LABEL: mad24_known_bits_destroyed: +; EG: ; %bb.0: ; %bb +; EG-NEXT: ALU 21, @12, KC0[CB0:0-32], KC1[] +; EG-NEXT: LOOP_START_DX10 @11 +; EG-NEXT: ALU 8, @34, KC0[], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T2.X, 0 +; EG-NEXT: ALU 14, @43, KC0[], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 0 +; EG-NEXT: ALU_PUSH_BEFORE 3, @58, KC0[], KC1[] +; EG-NEXT: JUMP @10 POP:1 +; EG-NEXT: LOOP_BREAK @10 +; EG-NEXT: POP @10 POP:1 +; EG-NEXT: END_LOOP @2 +; EG-NEXT: CF_END +; EG-NEXT: ALU clause starting at 12: +; EG-NEXT: MOV * T0.W, KC0[5].X, +; EG-NEXT: MOV * T0.Z, KC0[4].W, +; EG-NEXT: MOV * T0.Y, KC0[4].Z, +; EG-NEXT: MOV T0.X, KC0[2].Y, +; EG-NEXT: AND_INT * T1.Y, KC0[4].X, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.Z, KC0[3].W, literal.x, +; EG-NEXT: AND_INT T1.W, KC0[3].Z, literal.x, +; EG-NEXT: MOV * T2.W, KC0[7].Y, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: LSHR T1.X, PS, literal.x, +; EG-NEXT: AND_INT T2.Y, KC0[6].Y, literal.y, +; EG-NEXT: MOV T2.Z, KC0[6].X, +; EG-NEXT: MOV * T2.W, KC0[5].W, +; EG-NEXT: 2(2.802597e-45), 16777215(2.350989e-38) +; EG-NEXT: MOV * T3.W, KC0[7].X, +; EG-NEXT: LSHR T2.X, PV.W, literal.x, +; EG-NEXT: MOV T3.Y, KC0[5].Z, +; EG-NEXT: MOV T3.Z, KC0[6].Z, +; EG-NEXT: MOV * T3.W, KC0[6].W, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; EG-NEXT: MOV * T4.W, KC0[4].Y, +; EG-NEXT: ALU clause starting at 34: +; EG-NEXT: MULLO_INT * T0.X, T0.X, T2.Y, +; EG-NEXT: ADD_INT * T4.W, PS, T3.Z, +; EG-NEXT: AND_INT * T4.W, PV.W, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULLO_INT * T0.X, PV.W, T2.Y, +; EG-NEXT: MULLO_INT * T0.W, T0.W, T1.Y, +; EG-NEXT: MULLO_INT * T0.Z, T0.Z, T1.Z, +; EG-NEXT: MULLO_INT * T0.Y, T0.Y, T1.W, +; EG-NEXT: ADD_INT * T0.X, T0.X, T3.Z, +; EG-NEXT: ALU clause starting at 43: +; EG-NEXT: ADD_INT * T4.W, T0.Y, T3.Y, +; EG-NEXT: AND_INT T4.W, PV.W, literal.x, +; EG-NEXT: ADD_INT * T5.W, T0.Z, T2.W, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.Z, PS, literal.x, +; EG-NEXT: ADD_INT T0.W, T0.W, T2.Z, +; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: ADD_INT T0.Y, PS, T3.Y, +; EG-NEXT: AND_INT T0.W, PV.W, literal.x, +; EG-NEXT: MULLO_INT * T0.Z, PV.Z, T1.Z, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: ADD_INT T0.Z, PS, T2.W, +; EG-NEXT: MULLO_INT * T0.W, PV.W, T1.Y, +; EG-NEXT: ADD_INT * T0.W, PS, T2.Z, +; EG-NEXT: ALU clause starting at 58: +; EG-NEXT: ADD_INT * T3.W, T3.W, literal.x, +; EG-NEXT: -1(nan), 0(0.000000e+00) +; EG-NEXT: SETE_INT * T4.W, PV.W, 0.0, +; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, +; +; CM-LABEL: mad24_known_bits_destroyed: +; CM: ; %bb.0: ; %bb +; CM-NEXT: ALU 22, @12, KC0[CB0:0-32], KC1[] +; CM-NEXT: LOOP_START_DX10 @11 +; CM-NEXT: ALU 23, @35, KC0[], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T2.X +; CM-NEXT: ALU 23, @59, KC0[], KC1[] +; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X +; CM-NEXT: ALU_PUSH_BEFORE 3, @83, KC0[], KC1[] +; CM-NEXT: JUMP @10 POP:1 +; CM-NEXT: LOOP_BREAK @10 +; CM-NEXT: POP @10 POP:1 +; CM-NEXT: END_LOOP @2 +; CM-NEXT: CF_END +; CM-NEXT: ALU clause starting at 12: +; CM-NEXT: MOV * T0.W, KC0[5].X, +; CM-NEXT: MOV * T0.Z, KC0[4].W, +; CM-NEXT: MOV * T0.Y, KC0[4].Z, +; CM-NEXT: MOV T0.X, KC0[2].Y, +; CM-NEXT: AND_INT * T1.Y, KC0[4].X, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.x, +; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: AND_INT T2.Y, KC0[6].Y, literal.x, +; CM-NEXT: MOV T2.Z, KC0[6].X, +; CM-NEXT: MOV * T2.W, KC0[7].Y, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: LSHR T1.X, PV.W, literal.x, +; CM-NEXT: MOV T3.Y, KC0[5].W, +; CM-NEXT: MOV T3.Z, KC0[5].Z, +; CM-NEXT: MOV * T2.W, KC0[7].X, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; CM-NEXT: LSHR T2.X, PV.W, literal.x, +; CM-NEXT: MOV T4.Y, KC0[6].Z, +; CM-NEXT: MOV T4.Z, KC0[6].W, +; CM-NEXT: MOV * T2.W, KC0[4].Y, +; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) +; CM-NEXT: ALU clause starting at 35: +; CM-NEXT: MULLO_INT T0.X, T0.X, T2.Y, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.X, T2.Y, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.X, T2.Y, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.X, T2.Y, +; CM-NEXT: ADD_INT * T2.W, PV.X, T4.Y, +; CM-NEXT: AND_INT * T2.W, PV.W, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X, T2.W, T2.Y, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T2.W, T2.Y, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T2.W, T2.Y, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T2.W, T2.Y, +; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Y, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Y, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Y, +; CM-NEXT: MULLO_INT * T0.W, T0.W, T1.Y, +; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Z, T1.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.Z, T1.Z, +; CM-NEXT: MULLO_INT T0.Z, T0.Z, T1.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Z, T1.Z, +; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Y, T1.W, +; CM-NEXT: MULLO_INT T0.Y, T0.Y, T1.W, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.Y, T1.W, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Y, T1.W, +; CM-NEXT: ADD_INT * T0.X, T0.X, T4.Y, +; CM-NEXT: ALU clause starting at 59: +; CM-NEXT: ADD_INT * T2.W, T0.Y, T3.Z, +; CM-NEXT: ADD_INT T0.Z, T0.Z, T3.Y, +; CM-NEXT: AND_INT * T2.W, PV.W, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X (MASKED), T2.W, T1.W, +; CM-NEXT: MULLO_INT T0.Y, T2.W, T1.W, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T2.W, T1.W, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T2.W, T1.W, +; CM-NEXT: ADD_INT T0.Y, PV.Y, T3.Z, +; CM-NEXT: ADD_INT T5.Z, T0.W, T2.Z, BS:VEC_021/SCL_122 +; CM-NEXT: AND_INT * T0.W, T0.Z, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Z, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Z, +; CM-NEXT: MULLO_INT T0.Z, T0.W, T1.Z, +; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T1.Z, +; CM-NEXT: ADD_INT T0.Z, PV.Z, T3.Y, +; CM-NEXT: AND_INT * T0.W, T5.Z, literal.x, +; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Y, +; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Y, +; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Y, +; CM-NEXT: MULLO_INT * T0.W, T0.W, T1.Y, +; CM-NEXT: ADD_INT * T0.W, PV.W, T2.Z, +; CM-NEXT: ALU clause starting at 83: +; CM-NEXT: ADD_INT * T4.Z, T4.Z, literal.x, +; CM-NEXT: -1(nan), 0(0.000000e+00) +; CM-NEXT: SETE_INT * T2.W, PV.Z, 0.0, +; CM-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0, +; +; GCN-LABEL: mad24_known_bits_destroyed: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v5, v0 +; GCN-NEXT: v_and_b32_e32 v0, 0xffffff, v13 +; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v2 +; GCN-NEXT: v_and_b32_e32 v2, 0xffffff, v3 +; GCN-NEXT: v_and_b32_e32 v3, 0xffffff, v4 +; GCN-NEXT: s_mov_b64 s[8:9], 0 +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s4, s6 +; GCN-NEXT: s_mov_b32 s5, s6 +; GCN-NEXT: .LBB9_1: ; %bb19 +; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_mad_u32_u24 v4, v5, v0, v14 +; GCN-NEXT: s_waitcnt expcnt(0) +; GCN-NEXT: v_mad_u32_u24 v6, v6, v1, v10 +; GCN-NEXT: v_mad_u32_u24 v7, v7, v2, v11 +; GCN-NEXT: v_mad_u32_u24 v8, v8, v3, v12 +; GCN-NEXT: v_add_i32_e32 v15, vcc, -1, v15 +; GCN-NEXT: v_mad_u32_u24 v5, v4, v0, v14 +; GCN-NEXT: v_mad_u32_u24 v6, v6, v1, v10 +; GCN-NEXT: v_mad_u32_u24 v7, v7, v2, v11 +; GCN-NEXT: v_mad_u32_u24 v8, v8, v3, v12 +; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 +; GCN-NEXT: buffer_store_dword v5, v[16:17], s[4:7], 0 addr64 +; GCN-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-NEXT: buffer_store_dwordx4 v[5:8], v[18:19], s[4:7], 0 addr64 +; GCN-NEXT: s_andn2_b64 exec, exec, s[8:9] +; GCN-NEXT: s_cbranch_execnz .LBB9_1 +; GCN-NEXT: ; %bb.2: ; %bb18 +; GCN-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: mad24_known_bits_destroyed: +; GFX8: ; %bb.0: ; %bb +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v5, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v13 +; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v2 +; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 +; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v4 +; GFX8-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-NEXT: .LBB9_1: ; %bb19 +; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: v_add_u32_e32 v15, vcc, -1, v15 +; GFX8-NEXT: v_mad_u32_u24 v4, v5, v0, v14 +; GFX8-NEXT: v_mad_u32_u24 v6, v6, v1, v10 +; GFX8-NEXT: v_mad_u32_u24 v7, v7, v2, v11 +; GFX8-NEXT: v_mad_u32_u24 v8, v8, v3, v12 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 +; GFX8-NEXT: v_mad_u32_u24 v5, v4, v0, v14 +; GFX8-NEXT: v_mad_u32_u24 v6, v6, v1, v10 +; GFX8-NEXT: v_mad_u32_u24 v7, v7, v2, v11 +; GFX8-NEXT: v_mad_u32_u24 v8, v8, v3, v12 +; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX8-NEXT: flat_store_dword v[16:17], v5 +; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[5:8] +; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX8-NEXT: s_cbranch_execnz .LBB9_1 +; GFX8-NEXT: ; %bb.2: ; %bb18 +; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] bb: %tmp = and i32 %arg4, 16777215 %tmp9 = extractelement <4 x i32> %arg1, i64 1 diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index 697bcc3..5f6d622 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -206,8 +206,11 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s18, s16, 1 -; GCN-IR-NEXT: s_addc_u32 s19, s17, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[18:19], 0 +; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 +; GCN-IR-NEXT: s_or_b32 s10, s10, s11 +; GCN-IR-NEXT: s_cmp_lg_u32 s10, 0 +; GCN-IR-NEXT: s_addc_u32 s10, s17, 0 +; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s16, 63, s16 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[12:13], s16 @@ -217,9 +220,9 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_add_u32 s18, s2, -1 ; GCN-IR-NEXT: s_addc_u32 s19, s3, -1 ; GCN-IR-NEXT: s_not_b64 s[8:9], s[14:15] -; GCN-IR-NEXT: s_add_u32 s12, s8, s20 -; GCN-IR-NEXT: s_addc_u32 s13, s9, 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], 0 +; GCN-IR-NEXT: s_add_u32 s14, s8, s20 +; GCN-IR-NEXT: s_addc_u32 s15, s9, 0 +; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 ; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: .LBB0_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -227,19 +230,22 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_lshr_b32 s8, s11, 31 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[8:9] -; GCN-IR-NEXT: s_or_b64 s[10:11], s[14:15], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[10:11], s[12:13], s[10:11] ; GCN-IR-NEXT: s_sub_u32 s8, s18, s16 ; GCN-IR-NEXT: s_subb_u32 s8, s19, s17 -; GCN-IR-NEXT: s_ashr_i32 s14, s8, 31 -; GCN-IR-NEXT: s_mov_b32 s15, s14 -; GCN-IR-NEXT: s_and_b32 s8, s14, 1 -; GCN-IR-NEXT: s_and_b64 s[14:15], s[14:15], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s16, s16, s14 -; GCN-IR-NEXT: s_subb_u32 s17, s17, s15 -; GCN-IR-NEXT: s_add_u32 s12, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s13, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[12:13], 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], s[8:9] +; GCN-IR-NEXT: s_ashr_i32 s12, s8, 31 +; GCN-IR-NEXT: s_mov_b32 s13, s12 +; GCN-IR-NEXT: s_and_b32 s8, s12, 1 +; GCN-IR-NEXT: s_and_b64 s[20:21], s[12:13], s[2:3] +; GCN-IR-NEXT: s_sub_u32 s16, s16, s20 +; GCN-IR-NEXT: s_subb_u32 s17, s17, s21 +; GCN-IR-NEXT: s_add_u32 s14, s14, 1 +; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0 +; GCN-IR-NEXT: s_or_b32 s20, s20, s21 +; GCN-IR-NEXT: s_cmp_lg_u32 s20, 0 +; GCN-IR-NEXT: s_addc_u32 s15, s15, 0 +; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[12:13], s[8:9] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow7 @@ -389,25 +395,25 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) { ; GCN-IR-LABEL: v_test_sdiv: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v1 -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v12 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v13, 31, v3 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v12 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v0, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v1, v12, vcc -; GCN-IR-NEXT: v_xor_b32_e32 v0, v2, v13 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v3, v13 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v13 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc +; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v10 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v11, 31, v3 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v10 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v0, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v1, v10, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, v2, v11 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v3, v11 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v11, vcc ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e64 v2, s[6:7], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v6 ; GCN-IR-NEXT: v_add_i32_e64 v2, s[6:7], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v7 -; GCN-IR-NEXT: v_min_u32_e32 v11, v2, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[6:7], v10, v11 +; GCN-IR-NEXT: v_min_u32_e32 v9, v2, v3 +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[6:7], v8, v9 ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[6:7] ; GCN-IR-NEXT: v_subb_u32_e64 v3, s[6:7], 0, 0, s[6:7] @@ -416,70 +422,69 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) { ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v14, v12 -; GCN-IR-NEXT: v_mov_b32_e32 v15, v13 +; GCN-IR-NEXT: v_mov_b32_e32 v12, v10 +; GCN-IR-NEXT: v_mov_b32_e32 v13, v11 ; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v7, 0, s[4:5] ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v6, 0, s[4:5] ; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB1_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v3, vcc +; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, 1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[6:7], v2 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB1_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, -1, v1, vcc -; GCN-IR-NEXT: v_not_b32_e32 v4, v10 -; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[6:7], v8 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, v4, v11 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], -1, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[6:7], v14 +; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_not_b32_e32 v4, v8 +; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, v4, v9 +; GCN-IR-NEXT: v_addc_u32_e64 v17, s[8:9], -1, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB1_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v16, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v17, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v1 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v14, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v15, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v1 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, 1, v16 +; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, 0, v17, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB1_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB1_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB1_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v1 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v0 ; GCN-IR-NEXT: .LBB1_6: ; %Flow5 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-IR-NEXT: v_xor_b32_e32 v0, v13, v12 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v15, v14 +; GCN-IR-NEXT: v_xor_b32_e32 v0, v11, v10 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v13, v12 ; GCN-IR-NEXT: v_xor_b32_e32 v3, v4, v0 ; GCN-IR-NEXT: v_xor_b32_e32 v2, v5, v1 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v3, v0 @@ -1293,34 +1298,37 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] ; GCN-IR-NEXT: s_sub_u32 s2, s2, s4 ; GCN-IR-NEXT: s_subb_u32 s3, s3, s4 -; GCN-IR-NEXT: s_flbit_i32_b64 s14, s[2:3] -; GCN-IR-NEXT: s_add_u32 s10, s14, 0xffffffc5 +; GCN-IR-NEXT: s_flbit_i32_b64 s16, s[2:3] +; GCN-IR-NEXT: s_add_u32 s10, s16, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s11, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[12:13], s[10:11], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[10:11], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[10:11], 63 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13] ; GCN-IR-NEXT: s_and_b64 s[8:9], s[12:13], exec ; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[16:17] +; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s12, s10, 1 -; GCN-IR-NEXT: s_addc_u32 s13, s11, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[12:13], 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-IR-NEXT: s_or_b32 s8, s8, s9 +; GCN-IR-NEXT: s_cmp_lg_u32 s8, 0 +; GCN-IR-NEXT: s_addc_u32 s8, s11, 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s10, 63, s10 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] ; GCN-IR-NEXT: s_lshl_b64 s[8:9], 24, s10 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[12:13], 24, s12 -; GCN-IR-NEXT: s_add_u32 s16, s2, -1 -; GCN-IR-NEXT: s_addc_u32 s17, s3, -1 -; GCN-IR-NEXT: s_sub_u32 s10, 58, s14 -; GCN-IR-NEXT: s_subb_u32 s11, 0, 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], 0 +; GCN-IR-NEXT: s_add_u32 s14, s2, -1 +; GCN-IR-NEXT: s_addc_u32 s15, s3, -1 +; GCN-IR-NEXT: s_sub_u32 s16, 58, s16 +; GCN-IR-NEXT: s_subb_u32 s17, 0, 0 +; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1328,19 +1336,22 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_lshr_b32 s6, s9, 31 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[6:7] -; GCN-IR-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s6, s16, s12 -; GCN-IR-NEXT: s_subb_u32 s6, s17, s13 -; GCN-IR-NEXT: s_ashr_i32 s14, s6, 31 -; GCN-IR-NEXT: s_mov_b32 s15, s14 -; GCN-IR-NEXT: s_and_b32 s6, s14, 1 -; GCN-IR-NEXT: s_and_b64 s[14:15], s[14:15], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s12, s12, s14 -; GCN-IR-NEXT: s_subb_u32 s13, s13, s15 -; GCN-IR-NEXT: s_add_u32 s10, s10, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], s[6:7] +; GCN-IR-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; GCN-IR-NEXT: s_sub_u32 s6, s14, s12 +; GCN-IR-NEXT: s_subb_u32 s6, s15, s13 +; GCN-IR-NEXT: s_ashr_i32 s10, s6, 31 +; GCN-IR-NEXT: s_mov_b32 s11, s10 +; GCN-IR-NEXT: s_and_b32 s6, s10, 1 +; GCN-IR-NEXT: s_and_b64 s[18:19], s[10:11], s[2:3] +; GCN-IR-NEXT: s_sub_u32 s12, s12, s18 +; GCN-IR-NEXT: s_subb_u32 s13, s13, s19 +; GCN-IR-NEXT: s_add_u32 s16, s16, 1 +; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-IR-NEXT: s_or_b32 s18, s18, s19 +; GCN-IR-NEXT: s_cmp_lg_u32 s18, 0 +; GCN-IR-NEXT: s_addc_u32 s17, s17, 0 +; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[10:11], s[6:7] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow6 @@ -1472,17 +1483,17 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-LABEL: v_test_sdiv_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v1 -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v12 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v12 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v12, vcc +; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v10 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v10 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v10, vcc ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 ; GCN-IR-NEXT: s_movk_i32 s6, 0xffc5 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v10 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] @@ -1490,69 +1501,68 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, 24, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v13, v12 +; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], 24, v2 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB11_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v1, vcc -; GCN-IR-NEXT: v_lshr_b64 v[8:9], 24, v6 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 58, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v14, vcc, 58, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], 24, v6 +; GCN-IR-NEXT: v_subb_u32_e64 v15, s[8:9], 0, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB11_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v14, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v15, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v1 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v12, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v13, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v1 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, 1, v14 +; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, 0, v15, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB11_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB11_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB11_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v1 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v0 ; GCN-IR-NEXT: .LBB11_6: ; %Flow5 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-IR-NEXT: v_xor_b32_e32 v0, v4, v12 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v5, v13 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, v4, v10 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v5, v11 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v11, vcc ; GCN-IR-NEXT: s_setpc_b64 s[30:31] %result = sdiv i64 24, %x ret i64 %result @@ -1665,17 +1675,17 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-LABEL: v_test_sdiv_pow2_k_num_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v1 -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v12 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v12 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v12, vcc +; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v10 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v10 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v10, vcc ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 ; GCN-IR-NEXT: s_movk_i32 s6, 0xffd0 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v10 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] @@ -1684,70 +1694,69 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v4, 0, s[4:5] ; GCN-IR-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v13, v12 +; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[4:5], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[10:11], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[10:11] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v1, vcc -; GCN-IR-NEXT: v_lshr_b64 v[8:9], s[4:5], v6 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 47, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v14, vcc, 47, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], s[8:9], v6 +; GCN-IR-NEXT: v_subb_u32_e64 v15, s[8:9], 0, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB12_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v14, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v15, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v1 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v12, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v13, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v1 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, 1, v14 +; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, 0, v15, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB12_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB12_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB12_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v1 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v0 ; GCN-IR-NEXT: .LBB12_6: ; %Flow5 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-IR-NEXT: v_xor_b32_e32 v0, v4, v12 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v5, v13 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, v4, v10 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v5, v11 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v11, vcc ; GCN-IR-NEXT: s_setpc_b64 s[30:31] %result = sdiv i64 32768, %x ret i64 %result @@ -1767,20 +1776,20 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-LABEL: v_test_sdiv_pow2_k_den_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v1 -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v10 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v10 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v0, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v5, vcc, v1, v10, vcc +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v1 +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v8 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v8 +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v0, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v5, vcc, v1, v8, vcc ; GCN-IR-NEXT: v_ffbh_u32_e32 v0, v4 ; GCN-IR-NEXT: v_add_i32_e64 v0, s[4:5], 32, v0 ; GCN-IR-NEXT: v_ffbh_u32_e32 v1, v5 -; GCN-IR-NEXT: v_min_u32_e32 v8, v0, v1 -; GCN-IR-NEXT: v_sub_i32_e64 v0, s[4:5], 48, v8 +; GCN-IR-NEXT: v_min_u32_e32 v6, v0, v1 +; GCN-IR-NEXT: v_sub_i32_e64 v0, s[4:5], 48, v6 ; GCN-IR-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5] ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[0:1] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 +; GCN-IR-NEXT: v_mov_b32_e32 v9, v8 ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[0:1] ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 @@ -1790,61 +1799,60 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB13_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc +; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v0, s[4:5], 63, v0 -; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[4:5], v0 +; GCN-IR-NEXT: v_mov_b32_e32 v2, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v3, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB13_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[4:5], v6 -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 0xffffffcf, v8 -; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v5, s[4:5], 0, -1, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 0xffffffcf, v6 +; GCN-IR-NEXT: v_lshr_b64 v[4:5], v[4:5], v7 +; GCN-IR-NEXT: v_addc_u32_e64 v11, s[8:9], 0, -1, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v3, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff +; GCN-IR-NEXT: s_movk_i32 s10, 0x7fff ; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 +; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v2, 31, v1 -; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v2 -; GCN-IR-NEXT: v_sub_i32_e32 v2, vcc, s12, v6 +; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 -; GCN-IR-NEXT: v_subb_u32_e32 v2, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 1, v4 -; GCN-IR-NEXT: v_or_b32_e32 v0, v8, v0 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; GCN-IR-NEXT: v_and_b32_e32 v2, 1, v8 -; GCN-IR-NEXT: v_and_b32_e32 v8, 0x8000, v8 -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] -; GCN-IR-NEXT: v_or_b32_e32 v1, v9, v1 -; GCN-IR-NEXT: v_sub_i32_e64 v6, s[4:5], v6, v8 -; GCN-IR-NEXT: v_mov_b32_e32 v9, v3 -; GCN-IR-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v7, s[4:5] -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v8, v2 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v2, vcc, s10, v4 +; GCN-IR-NEXT: v_subb_u32_e32 v2, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_or_b32_e32 v0, v6, v0 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v6, 31, v2 +; GCN-IR-NEXT: v_and_b32_e32 v2, 1, v6 +; GCN-IR-NEXT: v_and_b32_e32 v6, 0x8000, v6 +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 +; GCN-IR-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v10 +; GCN-IR-NEXT: v_or_b32_e32 v1, v7, v1 +; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v7, v3 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v6, v2 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB13_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB13_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB13_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 ; GCN-IR-NEXT: v_or_b32_e32 v3, v3, v1 ; GCN-IR-NEXT: v_or_b32_e32 v2, v2, v0 ; GCN-IR-NEXT: .LBB13_6: ; %Flow5 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-IR-NEXT: v_xor_b32_e32 v0, v2, v10 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v3, v11 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v11, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, v2, v8 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v3, v9 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc ; GCN-IR-NEXT: s_setpc_b64 s[30:31] %result = sdiv i64 %x, 32768 ret i64 %result diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll index bb22144..9814ed8 100644 --- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll @@ -1,15 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s -; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s - -; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s -; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI %s +; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) { ; CI-LABEL: add_select_fabs_fabs_v2f16: @@ -63,69 +57,37 @@ define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -198,73 +160,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v1, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -328,73 +256,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f1 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -469,73 +363,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v2, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -597,63 +457,34 @@ define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_var_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_var_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y @@ -709,61 +540,33 @@ define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half -1.0, half -1.0> @@ -815,61 +618,33 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) ; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0> %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select) @@ -920,61 +695,33 @@ define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) { ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_posk_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_posk_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 1.0, half 1.0> %add = fadd <2 x half> %select, %x @@ -1029,61 +776,33 @@ define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negk_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negk_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fabs @@ -1140,61 +859,33 @@ define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> <half -1024.0, half -1024.0>, <2 x half> %fabs @@ -1250,61 +941,33 @@ define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half 1.0, half 1.0> @@ -1360,61 +1023,33 @@ define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_posk_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_posk_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fabs @@ -1470,57 +1105,31 @@ define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1587,61 +1196,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1705,61 +1286,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f1 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1828,61 +1381,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x ; GFX9-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fneg.y = fneg <2 x half> %y @@ -1948,63 +1473,34 @@ define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_var_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_var_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y @@ -2058,55 +1554,30 @@ define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half -1.0, half -1.0> @@ -2161,55 +1632,30 @@ define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xH3118, half 0xH3118> @@ -2264,55 +1710,30 @@ define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x, ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xHB118, half 0xHB118> @@ -2363,61 +1784,33 @@ define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) { ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negk_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negk_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0> %add = fadd <2 x half> %select, %x @@ -2469,61 +1862,33 @@ define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> <half -2048.0, half -2048.0>, <2 x half> <half -4096.0, half -4096.0> %add = fadd <2 x half> %select, %x @@ -2573,61 +1938,33 @@ define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) ; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0> %fneg.x = fneg <2 x half> %select @@ -2681,55 +2018,30 @@ define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negk_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negk_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fneg.x @@ -2783,55 +2095,30 @@ define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fneg_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fneg_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 1.0, half 1.0> @@ -2885,55 +2172,30 @@ define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x ; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_posk_fneg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_posk_fneg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fneg.x @@ -2997,69 +2259,37 @@ define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negfabs_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negfabs_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3125,69 +2355,37 @@ define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -3253,69 +2451,37 @@ define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_neg_fabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_neg_fabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -3380,69 +2546,37 @@ define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h ; GFX9-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_fabs_neg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_fabs_neg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.y = fneg <2 x half> %y @@ -3501,63 +2635,34 @@ define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_neg_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_neg_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fneg.x = fneg <2 x half> %x %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y) @@ -3617,63 +2722,34 @@ define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: add_select_negfabs_neg_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: add_select_negfabs_neg_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1] +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3735,61 +2811,33 @@ define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_negfabs_posk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_negfabs_posk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3850,61 +2898,33 @@ define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_posk_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_posk_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -3965,61 +2985,33 @@ define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_negfabs_negk_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_negfabs_negk_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -4080,61 +3072,33 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 ; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: mul_select_negk_negfabs_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: mul_select_negk_negfabs_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) %fneg.fabs.x = fneg <2 x half> %fabs.x @@ -4171,115 +3135,63 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, < ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc ; CI-NEXT: s_setpc_b64 s[30:31] ; -; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x4400 -; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SAFE-NEXT: v_add_f16_e32 v2, 4.0, v2 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_add_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0xc400 -; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NSZ-NEXT: v_sub_f16_e32 v2, -4.0, v2 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_add_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: select_fneg_posk_src_add_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0x4400 +; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_add_f16_e32 v2, 4.0, v2 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_add_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %add = fadd <2 x half> %x, <half 4.0, half 4.0> %fneg = fneg <2 x half> %add @@ -4330,55 +3242,30 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> % ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %add = fadd nsz <2 x half> %x, <half 4.0, half 4.0> %fneg = fneg <2 x half> %add @@ -4387,153 +3274,86 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> % } define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) { -; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: -; CI-SAFE: ; %bb.0: -; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_add_f32_e32 v3, -4.0, v3 -; CI-SAFE-NEXT: v_add_f32_e32 v2, -4.0, v2 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3 -; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2 -; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2 -; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc -; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc -; CI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0xc400 -; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-SAFE-NEXT: v_add_f16_e32 v2, -4.0, v2 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: -; CI-NSZ: ; %bb.0: -; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2 -; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc -; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc -; CI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x4400 -; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NSZ-NEXT: v_sub_f16_e32 v2, 4.0, v2 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; CI-LABEL: select_fneg_posk_src_sub_v2f16: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_add_f32_e32 v3, -4.0, v3 +; CI-NEXT: v_add_f32_e32 v2, -4.0, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; CI-NEXT: v_or_b32_e32 v2, v2, v3 +; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v2 +; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc +; CI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: select_fneg_posk_src_sub_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0xc400 +; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_add_f16_e32 v2, -4.0, v2 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_sub_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %add = fsub <2 x half> %x, <half 4.0, half 4.0> %fneg = fneg <2 x half> %add @@ -4541,6 +3361,80 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) { ret <2 x half> %select } +define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> %x) { +; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2 +; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3 +; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc +; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc +; CI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_mov_b32_e32 v1, 0x4400 +; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2 +; VI-NEXT: v_mov_b32_e32 v3, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] + %cmp = icmp eq <2 x i32> %c, zeroinitializer + %add = fsub <2 x half> %x, <half 4.0, half 4.0> + %fneg = fneg nsz <2 x half> %add + %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0> + ret <2 x half> %select +} + define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) { ; CI-LABEL: select_fneg_posk_src_mul_v2f16: ; CI: ; %bb.0: @@ -4584,55 +3478,30 @@ define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) { ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %mul = fmul <2 x half> %x, <half 4.0, half 4.0> %fneg = fneg <2 x half> %mul @@ -4668,118 +3537,65 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, < ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc ; CI-NEXT: s_setpc_b64 s[30:31] ; -; VI-SAFE-LABEL: select_fneg_posk_src_fma_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1 -; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1 -; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: select_fneg_posk_src_fma_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_fma_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z) %fneg = fneg <2 x half> %fma @@ -4817,118 +3633,65 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x, ; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc ; CI-NEXT: s_setpc_b64 s[30:31] ; -; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16: -; VI-SAFE: ; %bb.0: -; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1 -; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3 -; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX9-SAFE: ; %bb.0: -; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 -; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16: -; VI-NSZ: ; %bb.0: -; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1 -; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3 -; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000 -; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5] -; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX9-NSZ: ; %bb.0: -; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 -; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 -; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] -; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100 -; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4 -; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: select_fneg_posk_src_fmad_v2f16: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3 +; VI-NEXT: v_or_b32_e32 v1, v2, v1 +; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; VI-NEXT: v_mov_b32_e32 v2, 0x4000 +; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5] +; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_mov_b32 s4, 0x5040100 +; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z) %fneg = fneg <2 x half> %fmad @@ -4986,55 +3749,30 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half> ; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-SAFE-TRUE16: ; %bb.0: -; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-SAFE-FAKE16: ; %bb.0: -; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-NSZ-TRUE16: ; %bb.0: -; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 -; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo -; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 -; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: -; GFX11-NSZ-FAKE16: ; %bb.0: -; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 -; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo -; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 -; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq <2 x i32> %c, zeroinitializer %fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z) %fneg = fneg <2 x half> %fmad @@ -5049,5 +3787,3 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11: {{.*}} -; GFX11-NSZ: {{.*}} -; GFX11-SAFE: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 465024a..33b0a5d 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -170,35 +170,38 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[6:7], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 ; GCN-IR-NEXT: s_flbit_i32_b64 s10, s[6:7] -; GCN-IR-NEXT: s_flbit_i32_b64 s18, s[2:3] +; GCN-IR-NEXT: s_flbit_i32_b64 s16, s[2:3] ; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] -; GCN-IR-NEXT: s_sub_u32 s12, s10, s18 +; GCN-IR-NEXT: s_sub_u32 s12, s10, s16 ; GCN-IR-NEXT: s_subb_u32 s13, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[12:13], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[12:13], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[12:13], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[8:9], s[14:15] ; GCN-IR-NEXT: s_and_b64 s[8:9], s[14:15], exec ; GCN-IR-NEXT: s_cselect_b32 s9, 0, s3 ; GCN-IR-NEXT: s_cselect_b32 s8, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[14:15] ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s15, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-IR-NEXT: s_or_b32 s8, s8, s9 +; GCN-IR-NEXT: s_cmp_lg_u32 s8, 0 +; GCN-IR-NEXT: s_addc_u32 s8, s13, 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s12 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s14 -; GCN-IR-NEXT: s_add_u32 s16, s6, -1 -; GCN-IR-NEXT: s_addc_u32 s17, s7, -1 +; GCN-IR-NEXT: s_add_u32 s14, s6, -1 +; GCN-IR-NEXT: s_addc_u32 s15, s7, -1 ; GCN-IR-NEXT: s_not_b64 s[4:5], s[10:11] -; GCN-IR-NEXT: s_add_u32 s10, s4, s18 -; GCN-IR-NEXT: s_addc_u32 s11, s5, 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], 0 +; GCN-IR-NEXT: s_add_u32 s16, s4, s16 +; GCN-IR-NEXT: s_addc_u32 s17, s5, 0 +; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: .LBB0_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -206,19 +209,22 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-IR-NEXT: s_lshr_b32 s4, s9, 31 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[4:5] -; GCN-IR-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s4, s16, s12 -; GCN-IR-NEXT: s_subb_u32 s4, s17, s13 -; GCN-IR-NEXT: s_ashr_i32 s14, s4, 31 -; GCN-IR-NEXT: s_mov_b32 s15, s14 -; GCN-IR-NEXT: s_and_b32 s4, s14, 1 -; GCN-IR-NEXT: s_and_b64 s[14:15], s[14:15], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s12, s12, s14 -; GCN-IR-NEXT: s_subb_u32 s13, s13, s15 -; GCN-IR-NEXT: s_add_u32 s10, s10, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; GCN-IR-NEXT: s_sub_u32 s4, s14, s12 +; GCN-IR-NEXT: s_subb_u32 s4, s15, s13 +; GCN-IR-NEXT: s_ashr_i32 s10, s4, 31 +; GCN-IR-NEXT: s_mov_b32 s11, s10 +; GCN-IR-NEXT: s_and_b32 s4, s10, 1 +; GCN-IR-NEXT: s_and_b64 s[18:19], s[10:11], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s12, s12, s18 +; GCN-IR-NEXT: s_subb_u32 s13, s13, s19 +; GCN-IR-NEXT: s_add_u32 s16, s16, 1 +; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-IR-NEXT: s_or_b32 s18, s18, s19 +; GCN-IR-NEXT: s_cmp_lg_u32 s18, 0 +; GCN-IR-NEXT: s_addc_u32 s17, s17, 0 +; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow7 @@ -373,12 +379,12 @@ define i64 @v_test_srem(i64 %x, i64 %y) { ; GCN-IR-LABEL: v_test_srem: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-IR-NEXT: v_ashrrev_i32_e32 v14, 31, v1 -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v14 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v14 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v1 +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v12 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v12 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v14, vcc +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v12, vcc ; GCN-IR-NEXT: v_xor_b32_e32 v2, v2, v4 ; GCN-IR-NEXT: v_xor_b32_e32 v3, v3, v4 ; GCN-IR-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 @@ -386,12 +392,12 @@ define i64 @v_test_srem(i64 %x, i64 %y) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v4, v2 ; GCN-IR-NEXT: v_add_i32_e64 v4, s[6:7], 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v3 -; GCN-IR-NEXT: v_min_u32_e32 v12, v4, v5 +; GCN-IR-NEXT: v_min_u32_e32 v10, v4, v5 ; GCN-IR-NEXT: v_ffbh_u32_e32 v4, v0 ; GCN-IR-NEXT: v_add_i32_e64 v4, s[6:7], 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v1 -; GCN-IR-NEXT: v_min_u32_e32 v13, v4, v5 -; GCN-IR-NEXT: v_sub_i32_e64 v4, s[6:7], v12, v13 +; GCN-IR-NEXT: v_min_u32_e32 v11, v4, v5 +; GCN-IR-NEXT: v_sub_i32_e64 v4, s[6:7], v10, v11 ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_subb_u32_e64 v5, s[6:7], 0, 0, s[6:7] @@ -400,7 +406,7 @@ define i64 @v_test_srem(i64 %x, i64 %y) { ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 -; GCN-IR-NEXT: v_mov_b32_e32 v15, v14 +; GCN-IR-NEXT: v_mov_b32_e32 v13, v12 ; GCN-IR-NEXT: v_cndmask_b32_e64 v7, v1, 0, s[4:5] ; GCN-IR-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5] ; GCN-IR-NEXT: s_and_b64 s[4:5], s[6:7], vcc @@ -408,54 +414,53 @@ define i64 @v_test_srem(i64 %x, i64 %y) { ; GCN-IR-NEXT: s_cbranch_execz .LBB1_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 63, v4 -; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[0:1], v4 +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB1_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, -1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, -1, v3, vcc -; GCN-IR-NEXT: v_not_b32_e32 v6, v12 -; GCN-IR-NEXT: v_lshr_b64 v[10:11], v[0:1], v8 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v6, v13 -; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v9, s[4:5], -1, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v13, 0 +; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v3, vcc +; GCN-IR-NEXT: v_not_b32_e32 v6, v10 +; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, v6, v11 +; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v8 +; GCN-IR-NEXT: v_addc_u32_e64 v17, s[8:9], -1, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: .LBB1_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 +; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5 -; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 +; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v6 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v16, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v17, v11, vcc -; GCN-IR-NEXT: v_or_b32_e32 v4, v12, v4 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v6 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v8 -; GCN-IR-NEXT: v_or_b32_e32 v5, v13, v5 -; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v12 -; GCN-IR-NEXT: v_and_b32_e32 v13, v12, v3 -; GCN-IR-NEXT: v_and_b32_e32 v12, v12, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] -; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v12 -; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v13, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v13, v7 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v12, v6 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v14, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v15, v9, vcc +; GCN-IR-NEXT: v_or_b32_e32 v4, v10, v4 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v6 +; GCN-IR-NEXT: v_or_b32_e32 v5, v11, v5 +; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v10 +; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v3 +; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v2 +; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, v8, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, v9, v11, vcc +; GCN-IR-NEXT: v_add_i32_e32 v16, vcc, 1, v16 +; GCN-IR-NEXT: v_addc_u32_e32 v17, vcc, 0, v17, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v11, v7 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v10, v6 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB1_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB1_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB1_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 ; GCN-IR-NEXT: v_or_b32_e32 v7, v7, v5 ; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 @@ -469,10 +474,10 @@ define i64 @v_test_srem(i64 %x, i64 %y) { ; GCN-IR-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v14 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v15 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v15, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v12 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v13 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc ; GCN-IR-NEXT: s_setpc_b64 s[30:31] %result = srem i64 %x, %y ret i64 %result @@ -1148,35 +1153,38 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[2:3], s[8:9], 0 ; GCN-IR-NEXT: s_flbit_i32_b64 s12, s[8:9] ; GCN-IR-NEXT: s_or_b64 s[10:11], s[2:3], s[10:11] -; GCN-IR-NEXT: s_flbit_i32_b64 s20, s[6:7] -; GCN-IR-NEXT: s_sub_u32 s14, s12, s20 +; GCN-IR-NEXT: s_flbit_i32_b64 s18, s[6:7] +; GCN-IR-NEXT: s_sub_u32 s14, s12, s18 ; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[16:17], s[14:15], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[14:15], 63 ; GCN-IR-NEXT: s_or_b64 s[16:17], s[10:11], s[16:17] ; GCN-IR-NEXT: s_and_b64 s[10:11], s[16:17], exec ; GCN-IR-NEXT: s_cselect_b32 s11, 0, s7 ; GCN-IR-NEXT: s_cselect_b32 s10, 0, s6 -; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] +; GCN-IR-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21] ; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s16, s14, 1 -; GCN-IR-NEXT: s_addc_u32 s17, s15, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[16:17], 0 +; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 +; GCN-IR-NEXT: s_or_b32 s10, s10, s11 +; GCN-IR-NEXT: s_cmp_lg_u32 s10, 0 +; GCN-IR-NEXT: s_addc_u32 s10, s15, 0 +; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s14, 63, s14 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[6:7], s14 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[14:15], s[6:7], s16 -; GCN-IR-NEXT: s_add_u32 s18, s8, -1 -; GCN-IR-NEXT: s_addc_u32 s19, s9, -1 +; GCN-IR-NEXT: s_add_u32 s16, s8, -1 +; GCN-IR-NEXT: s_addc_u32 s17, s9, -1 ; GCN-IR-NEXT: s_not_b64 s[2:3], s[12:13] -; GCN-IR-NEXT: s_add_u32 s12, s2, s20 -; GCN-IR-NEXT: s_addc_u32 s13, s3, 0 -; GCN-IR-NEXT: s_mov_b64 s[16:17], 0 +; GCN-IR-NEXT: s_add_u32 s18, s2, s18 +; GCN-IR-NEXT: s_addc_u32 s19, s3, 0 +; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 ; GCN-IR-NEXT: s_mov_b32 s3, 0 ; GCN-IR-NEXT: .LBB8_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1184,19 +1192,22 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-IR-NEXT: s_lshr_b32 s2, s11, 31 ; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[2:3] -; GCN-IR-NEXT: s_or_b64 s[10:11], s[16:17], s[10:11] -; GCN-IR-NEXT: s_sub_u32 s2, s18, s14 -; GCN-IR-NEXT: s_subb_u32 s2, s19, s15 -; GCN-IR-NEXT: s_ashr_i32 s16, s2, 31 -; GCN-IR-NEXT: s_mov_b32 s17, s16 -; GCN-IR-NEXT: s_and_b32 s2, s16, 1 -; GCN-IR-NEXT: s_and_b64 s[16:17], s[16:17], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s14, s14, s16 -; GCN-IR-NEXT: s_subb_u32 s15, s15, s17 -; GCN-IR-NEXT: s_add_u32 s12, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s13, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[20:21], s[12:13], 0 -; GCN-IR-NEXT: s_mov_b64 s[16:17], s[2:3] +; GCN-IR-NEXT: s_or_b64 s[10:11], s[12:13], s[10:11] +; GCN-IR-NEXT: s_sub_u32 s2, s16, s14 +; GCN-IR-NEXT: s_subb_u32 s2, s17, s15 +; GCN-IR-NEXT: s_ashr_i32 s12, s2, 31 +; GCN-IR-NEXT: s_mov_b32 s13, s12 +; GCN-IR-NEXT: s_and_b32 s2, s12, 1 +; GCN-IR-NEXT: s_and_b64 s[20:21], s[12:13], s[8:9] +; GCN-IR-NEXT: s_sub_u32 s14, s14, s20 +; GCN-IR-NEXT: s_subb_u32 s15, s15, s21 +; GCN-IR-NEXT: s_add_u32 s18, s18, 1 +; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0 +; GCN-IR-NEXT: s_or_b32 s20, s20, s21 +; GCN-IR-NEXT: s_cmp_lg_u32 s20, 0 +; GCN-IR-NEXT: s_addc_u32 s19, s19, 0 +; GCN-IR-NEXT: s_cselect_b64 s[20:21], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[12:13], s[2:3] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[20:21] ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3 ; GCN-IR-NEXT: .LBB8_4: ; %Flow7 @@ -1461,34 +1472,37 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_xor_b64 s[2:3], s[2:3], s[8:9] ; GCN-IR-NEXT: s_sub_u32 s4, s2, s8 ; GCN-IR-NEXT: s_subb_u32 s5, s3, s8 -; GCN-IR-NEXT: s_flbit_i32_b64 s12, s[4:5] -; GCN-IR-NEXT: s_add_u32 s2, s12, 0xffffffc5 +; GCN-IR-NEXT: s_flbit_i32_b64 s14, s[4:5] +; GCN-IR-NEXT: s_add_u32 s2, s14, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s3, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[4:5], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[2:3], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[2:3], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 63 ; GCN-IR-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] ; GCN-IR-NEXT: s_and_b64 s[8:9], s[10:11], exec ; GCN-IR-NEXT: s_cselect_b32 s8, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] +; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_mov_b32 s9, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s8, s2, 1 -; GCN-IR-NEXT: s_addc_u32 s9, s3, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 0 +; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 +; GCN-IR-NEXT: s_or_b32 s9, s10, s11 +; GCN-IR-NEXT: s_cmp_lg_u32 s9, 0 +; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 +; GCN-IR-NEXT: s_cselect_b64 s[10:11], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s2, 63, s2 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_lshl_b64 s[2:3], 24, s2 ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s8 -; GCN-IR-NEXT: s_add_u32 s14, s4, -1 -; GCN-IR-NEXT: s_addc_u32 s15, s5, -1 -; GCN-IR-NEXT: s_sub_u32 s8, 58, s12 -; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 +; GCN-IR-NEXT: s_add_u32 s12, s4, -1 +; GCN-IR-NEXT: s_addc_u32 s13, s5, -1 +; GCN-IR-NEXT: s_sub_u32 s14, 58, s14 +; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1496,19 +1510,22 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_lshr_b32 s6, s3, 31 ; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 ; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[6:7] -; GCN-IR-NEXT: s_or_b64 s[2:3], s[12:13], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s6, s14, s10 -; GCN-IR-NEXT: s_subb_u32 s6, s15, s11 -; GCN-IR-NEXT: s_ashr_i32 s12, s6, 31 -; GCN-IR-NEXT: s_mov_b32 s13, s12 -; GCN-IR-NEXT: s_and_b32 s6, s12, 1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[12:13], s[4:5] -; GCN-IR-NEXT: s_sub_u32 s10, s10, s12 -; GCN-IR-NEXT: s_subb_u32 s11, s11, s13 -; GCN-IR-NEXT: s_add_u32 s8, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[8:9], 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], s[6:7] +; GCN-IR-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] +; GCN-IR-NEXT: s_sub_u32 s6, s12, s10 +; GCN-IR-NEXT: s_subb_u32 s6, s13, s11 +; GCN-IR-NEXT: s_ashr_i32 s8, s6, 31 +; GCN-IR-NEXT: s_mov_b32 s9, s8 +; GCN-IR-NEXT: s_and_b32 s6, s8, 1 +; GCN-IR-NEXT: s_and_b64 s[16:17], s[8:9], s[4:5] +; GCN-IR-NEXT: s_sub_u32 s10, s10, s16 +; GCN-IR-NEXT: s_subb_u32 s11, s11, s17 +; GCN-IR-NEXT: s_add_u32 s14, s14, 1 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_or_b32 s16, s16, s17 +; GCN-IR-NEXT: s_cmp_lg_u32 s16, 0 +; GCN-IR-NEXT: s_addc_u32 s15, s15, 0 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], s[6:7] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB10_3 ; GCN-IR-NEXT: .LBB10_4: ; %Flow6 @@ -1647,9 +1664,9 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 ; GCN-IR-NEXT: s_movk_i32 s6, 0xffc5 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v10 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] @@ -1663,53 +1680,52 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_cbranch_execz .LBB11_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], 24, v2 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB11_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc -; GCN-IR-NEXT: v_lshr_b64 v[8:9], 24, v6 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 58, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v12, vcc, 58, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], 24, v6 +; GCN-IR-NEXT: v_subb_u32_e64 v13, s[8:9], 0, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB11_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v12, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v13, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v1 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v10, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v11, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v1 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v12 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB11_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB11_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB11_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 @@ -1838,9 +1854,9 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 ; GCN-IR-NEXT: s_movk_i32 s6, 0xffd0 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v10 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, s6, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] @@ -1855,54 +1871,53 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[4:5], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[10:11], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[10:11] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc -; GCN-IR-NEXT: v_lshr_b64 v[8:9], s[4:5], v6 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 47, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v12, vcc, 47, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], s[8:9], v6 +; GCN-IR-NEXT: v_subb_u32_e64 v13, s[8:9], 0, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB12_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v12, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v13, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v1 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v10, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v11, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v1 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v12 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB12_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB12_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB12_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 @@ -1937,20 +1952,20 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { ; GCN-IR-LABEL: v_test_srem_pow2_k_den_i64: ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v1 -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v12 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v12 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v12, vcc +; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v10 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v10 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v10, vcc ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e64 v2, s[4:5], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 48, v10 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 48, v8 ; GCN-IR-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5] ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3] -; GCN-IR-NEXT: v_mov_b32_e32 v13, v12 +; GCN-IR-NEXT: v_mov_b32_e32 v11, v10 ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[2:3] ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 @@ -1961,51 +1976,50 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_cbranch_execz .LBB13_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[0:1], v2 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB13_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v6 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 0xffffffcf, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], 0, -1, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 0xffffffcf, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[0:1], v6 +; GCN-IR-NEXT: v_addc_u32_e64 v13, s[8:9], 0, -1, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff +; GCN-IR-NEXT: s_movk_i32 s10, 0x7fff ; GCN-IR-NEXT: .LBB13_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v8 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v9, vcc -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v10, 0x8000, v10 -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v9, s[4:5] -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s10, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v8, 0x8000, v8 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v7, vcc +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v12 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB13_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB13_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB13_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 @@ -2014,10 +2028,10 @@ define i64 @v_test_srem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[4:5], 15 ; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc -; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v12 -; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v13 -; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 -; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v13, vcc +; GCN-IR-NEXT: v_xor_b32_e32 v0, v0, v10 +; GCN-IR-NEXT: v_xor_b32_e32 v1, v1, v11 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v11, vcc ; GCN-IR-NEXT: s_setpc_b64 s[30:31] %result = srem i64 %x, 32768 ret i64 %result diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll index e1574dc..bb5918b2 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -14,15 +14,16 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_add_u32 s0, s2, s8 -; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: s_add_u32 s2, s2, s8 ; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: s_addc_u32 s1, s3, s9 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: s_or_b32 s0, s0, s1 +; SI-NEXT: s_cmp_lg_u32 s0, 0 +; SI-NEXT: s_addc_u32 s3, s3, s9 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: v_mov_b32_e32 v1, s3 -; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1] -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, s2, v0 ; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -33,15 +34,15 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_add_u32 s0, s2, s4 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_add_u32 s2, s2, s4 ; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: s_cmp_lg_u64 s[0:1], 0 +; VI-NEXT: s_addc_u32 s3, s3, s5 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_addc_u32 s1, s3, s5 -; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[2:3] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm @@ -52,14 +53,14 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: s_add_u32 s4, s2, s6 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_addc_u32 s5, s3, s7 -; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[4:5], v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0 +; GFX9-NEXT: s_add_u32 s6, s2, s6 +; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 +; GFX9-NEXT: s_addc_u32 s4, s3, s7 +; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm @@ -71,12 +72,14 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_add_u32 s4, s2, s6 -; GFX10-NEXT: s_addc_u32 s5, s3, s7 -; GFX10-NEXT: v_cmp_lt_u64_e64 s2, s[4:5], s[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v0, s2, s4, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, s5, 0, s2 +; GFX10-NEXT: s_add_u32 s2, s2, s6 +; GFX10-NEXT: s_cselect_b32 s4, -1, 0 +; GFX10-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10-NEXT: s_addc_u32 s3, s3, s7 +; GFX10-NEXT: s_cselect_b32 s4, -1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v0, s2, s2, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, s3, 0, s2 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -87,14 +90,16 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_u32 s4, s2, s4 -; GFX11-NEXT: s_addc_u32 s5, s3, s5 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cmp_lt_u64_e64 s2, s[4:5], s[2:3] -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX11-NEXT: s_add_u32 s2, s2, s4 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-NEXT: s_addc_u32 s3, s3, s5 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_add_co_u32 v0, s2, s4, v0 -; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s2 +; GFX11-NEXT: v_add_co_u32 v0, s2, s2, v0 +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s2 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) @@ -436,21 +441,23 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s10, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_add_u32 s6, s4, s6 -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_addc_u32 s7, s5, s7 -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1] -; SI-NEXT: v_mov_b32_e32 v2, s6 +; SI-NEXT: s_add_u32 s4, s4, s6 +; SI-NEXT: s_cselect_b64 s[12:13], -1, 0 +; SI-NEXT: s_or_b32 s6, s12, s13 +; SI-NEXT: s_cmp_lg_u32 s6, 0 +; SI-NEXT: s_addc_u32 s5, s5, s7 ; SI-NEXT: s_mov_b32 s8, s0 ; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 ; SI-NEXT: s_mov_b32 s0, s2 ; SI-NEXT: s_mov_b32 s1, s3 ; SI-NEXT: s_mov_b32 s2, s10 ; SI-NEXT: s_mov_b32 s3, s11 -; SI-NEXT: v_mov_b32_e32 v3, s7 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -458,37 +465,37 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_add_u32 s2, s4, s6 ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_add_u32 s0, s4, s6 -; VI-NEXT: v_mov_b32_e32 v4, s4 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_addc_u32 s1, s5, s7 -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_mov_b32_e32 v7, s1 -; VI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[4:5] -; VI-NEXT: v_mov_b32_e32 v6, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: s_cmp_lg_u64 s[0:1], 0 +; VI-NEXT: s_addc_u32 s0, s5, s7 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_mov_b32_e32 v5, s0 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[6:7] -; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; VI-NEXT: flat_store_byte v[2:3], v0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: s_uaddo_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s0, s12, s14 -; GFX9-NEXT: v_mov_b32_e32 v0, s12 -; GFX9-NEXT: v_mov_b32_e32 v1, s13 -; GFX9-NEXT: s_addc_u32 s1, s13, s15 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] -; GFX9-NEXT: global_store_byte v4, v0, s[10:11] +; GFX9-NEXT: s_add_u32 s2, s12, s14 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX9-NEXT: s_addc_u32 s0, s13, s15 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX9-NEXT: global_store_byte v2, v3, s[10:11] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: s_uaddo_i64: @@ -497,10 +504,12 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_add_u32 s0, s12, s14 -; GFX10-NEXT: s_addc_u32 s1, s13, s15 +; GFX10-NEXT: s_cselect_b32 s1, -1, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: s_cmp_lg_u32 s1, 0 +; GFX10-NEXT: s_addc_u32 s1, s13, s15 +; GFX10-NEXT: s_cselect_b32 s0, -1, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], s[12:13] ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX10-NEXT: global_store_byte v2, v3, s[10:11] @@ -510,12 +519,13 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_u32 s6, s4, s6 -; GFX11-NEXT: s_addc_u32 s7, s5, s7 -; GFX11-NEXT: v_mov_b32_e32 v0, s6 -; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[6:7], s[4:5] -; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: s_add_u32 s4, s4, s6 +; GFX11-NEXT: s_cselect_b32 s6, -1, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s4 +; GFX11-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-NEXT: s_addc_u32 s5, s5, s7 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s5 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] @@ -551,10 +561,10 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_i32_e32 v2, vcc, v0, v2 -; SI-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; SI-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -574,10 +584,9 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; VI-NEXT: v_mov_b32_e32 v6, s2 ; VI-NEXT: v_mov_b32_e32 v7, s3 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_u32_e32 v2, vcc, v0, v2 -; VI-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; VI-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; VI-NEXT: flat_store_dwordx2 v[4:5], v[2:3] +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; VI-NEXT: flat_store_byte v[6:7], v0 ; VI-NEXT: s_endpgm @@ -590,10 +599,9 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13] ; GFX9-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc -; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-NEXT: global_store_byte v4, v0, s[10:11] ; GFX9-NEXT: s_endpgm @@ -607,12 +615,11 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13] ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] -; GFX10-NEXT: global_store_byte v4, v0, s[10:11] +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9] +; GFX10-NEXT: global_store_byte v4, v2, s[10:11] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: v_uaddo_i64: @@ -624,14 +631,12 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-NEXT: global_load_b64 v[0:1], v4, s[4:5] ; GFX11-NEXT: global_load_b64 v[2:3], v4, s[6:7] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo -; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_store_b64 v4, v[2:3], s[0:1] -; GFX11-NEXT: global_store_b8 v4, v0, s[2:3] +; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1] +; GFX11-NEXT: global_store_b8 v4, v2, s[2:3] ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll index 9230174..7f89581 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll @@ -693,52 +693,47 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) { ; GFX6-LABEL: v_uaddsat_i64: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v2 -; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, -1, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_uaddsat_i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v2 -; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc -; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, -1, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_uaddsat_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc -; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -1, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_uaddsat_i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc_lo +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, -1, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_uaddsat_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo -; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc_lo +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, -1, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, -1, vcc_lo ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %result diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 1ed04f8..41199b0 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -146,8 +146,11 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s15, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-IR-NEXT: s_or_b32 s8, s8, s9 +; GCN-IR-NEXT: s_cmp_lg_u32 s8, 0 +; GCN-IR-NEXT: s_addc_u32 s8, s13, 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s12 @@ -157,9 +160,9 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-IR-NEXT: s_add_u32 s14, s6, -1 ; GCN-IR-NEXT: s_addc_u32 s15, s7, -1 ; GCN-IR-NEXT: s_not_b64 s[2:3], s[10:11] -; GCN-IR-NEXT: s_add_u32 s2, s2, s16 -; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 +; GCN-IR-NEXT: s_add_u32 s10, s2, s16 +; GCN-IR-NEXT: s_addc_u32 s11, s3, 0 +; GCN-IR-NEXT: s_mov_b64 s[2:3], 0 ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: .LBB0_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -167,19 +170,22 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-IR-NEXT: s_lshr_b32 s4, s9, 31 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[4:5] -; GCN-IR-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s4, s14, s12 -; GCN-IR-NEXT: s_subb_u32 s4, s15, s13 -; GCN-IR-NEXT: s_ashr_i32 s10, s4, 31 -; GCN-IR-NEXT: s_mov_b32 s11, s10 -; GCN-IR-NEXT: s_and_b32 s4, s10, 1 -; GCN-IR-NEXT: s_and_b64 s[10:11], s[10:11], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s12, s12, s10 -; GCN-IR-NEXT: s_subb_u32 s13, s13, s11 -; GCN-IR-NEXT: s_add_u32 s2, s2, 1 -; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[2:3], 0 -; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9] +; GCN-IR-NEXT: s_sub_u32 s2, s14, s12 +; GCN-IR-NEXT: s_subb_u32 s2, s15, s13 +; GCN-IR-NEXT: s_ashr_i32 s2, s2, 31 +; GCN-IR-NEXT: s_mov_b32 s3, s2 +; GCN-IR-NEXT: s_and_b32 s4, s2, 1 +; GCN-IR-NEXT: s_and_b64 s[16:17], s[2:3], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s12, s12, s16 +; GCN-IR-NEXT: s_subb_u32 s13, s13, s17 +; GCN-IR-NEXT: s_add_u32 s10, s10, 1 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_or_b32 s16, s16, s17 +; GCN-IR-NEXT: s_cmp_lg_u32 s16, 0 +; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[2:3], s[4:5] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow7 @@ -313,19 +319,19 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v4, v2 ; GCN-IR-NEXT: v_add_i32_e64 v4, s[6:7], 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v3 -; GCN-IR-NEXT: v_min_u32_e32 v14, v4, v5 +; GCN-IR-NEXT: v_min_u32_e32 v8, v4, v5 ; GCN-IR-NEXT: v_ffbh_u32_e32 v4, v0 ; GCN-IR-NEXT: v_add_i32_e64 v4, s[6:7], 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v1 -; GCN-IR-NEXT: v_min_u32_e32 v15, v4, v5 -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[6:7], v14, v15 +; GCN-IR-NEXT: v_min_u32_e32 v9, v4, v5 +; GCN-IR-NEXT: v_sub_i32_e64 v6, s[6:7], v8, v9 ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[6:7], 0, 0, s[6:7] -; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[6:7], 63, v[8:9] +; GCN-IR-NEXT: v_subb_u32_e64 v7, s[6:7], 0, 0, s[6:7] +; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[6:7], 63, v[6:7] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[8:9] +; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[6:7] ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v4, v1, 0, s[4:5] ; GCN-IR-NEXT: v_cndmask_b32_e64 v5, v0, 0, s[4:5] @@ -333,55 +339,54 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB1_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v8 -; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, 0, v9, vcc -; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 63, v8 -; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v6 +; GCN-IR-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc +; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 63, v6 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[0:1], v4 +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB1_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v2 -; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v10 -; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v3, vcc -; GCN-IR-NEXT: v_not_b32_e32 v0, v14 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, v0, v15 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v1, s[4:5], -1, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_lshr_b64 v[0:1], v[0:1], v10 +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, -1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, -1, v3, vcc +; GCN-IR-NEXT: v_not_b32_e32 v6, v8 +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, v6, v9 +; GCN-IR-NEXT: v_addc_u32_e64 v13, s[8:9], -1, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: .LBB1_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v6 +; GCN-IR-NEXT: v_or_b32_e32 v0, v0, v6 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v12, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v13, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v4, v10, v4 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v6 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GCN-IR-NEXT: v_or_b32_e32 v5, v11, v5 -; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v3 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v7 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v6 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v10, v0 +; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v11, v1, vcc +; GCN-IR-NEXT: v_or_b32_e32 v4, v8, v4 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v6 +; GCN-IR-NEXT: v_or_b32_e32 v5, v9, v5 +; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v3 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v2 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v1, vcc, v1, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v12 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v7 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v6 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB1_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB1_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB1_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[4:5], 1 ; GCN-IR-NEXT: v_or_b32_e32 v4, v7, v1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v6, v0 @@ -923,34 +928,37 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b64 s12, s[2:3] -; GCN-IR-NEXT: s_add_u32 s8, s12, 0xffffffc5 +; GCN-IR-NEXT: s_flbit_i32_b64 s14, s[2:3] +; GCN-IR-NEXT: s_add_u32 s8, s14, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] ; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec ; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] +; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[10:11], 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 +; GCN-IR-NEXT: s_or_b32 s6, s6, s7 +; GCN-IR-NEXT: s_cmp_lg_u32 s6, 0 +; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10 -; GCN-IR-NEXT: s_add_u32 s14, s2, -1 -; GCN-IR-NEXT: s_addc_u32 s15, s3, -1 -; GCN-IR-NEXT: s_sub_u32 s8, 58, s12 -; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 +; GCN-IR-NEXT: s_add_u32 s12, s2, -1 +; GCN-IR-NEXT: s_addc_u32 s13, s3, -1 +; GCN-IR-NEXT: s_sub_u32 s14, 58, s14 +; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: .LBB8_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -958,19 +966,22 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_lshr_b32 s4, s7, 31 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 ; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] -; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s4, s14, s10 -; GCN-IR-NEXT: s_subb_u32 s4, s15, s11 -; GCN-IR-NEXT: s_ashr_i32 s12, s4, 31 -; GCN-IR-NEXT: s_mov_b32 s13, s12 -; GCN-IR-NEXT: s_and_b32 s4, s12, 1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[12:13], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s10, s10, s12 -; GCN-IR-NEXT: s_subb_u32 s11, s11, s13 -; GCN-IR-NEXT: s_add_u32 s8, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[8:9], 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s4, s12, s10 +; GCN-IR-NEXT: s_subb_u32 s4, s13, s11 +; GCN-IR-NEXT: s_ashr_i32 s8, s4, 31 +; GCN-IR-NEXT: s_mov_b32 s9, s8 +; GCN-IR-NEXT: s_and_b32 s4, s8, 1 +; GCN-IR-NEXT: s_and_b64 s[16:17], s[8:9], s[2:3] +; GCN-IR-NEXT: s_sub_u32 s10, s10, s16 +; GCN-IR-NEXT: s_subb_u32 s11, s11, s17 +; GCN-IR-NEXT: s_add_u32 s14, s14, 1 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_or_b32 s16, s16, s17 +; GCN-IR-NEXT: s_cmp_lg_u32 s16, 0 +; GCN-IR-NEXT: s_addc_u32 s15, s15, 0 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB8_3 ; GCN-IR-NEXT: .LBB8_4: ; %Flow6 @@ -1094,12 +1105,12 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 0xffffffd0, v10 -; GCN-IR-NEXT: v_addc_u32_e64 v7, s[6:7], 0, -1, vcc +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 +; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 0xffffffd0, v8 +; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] -; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[6:7] -; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[6:7] +; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5] +; GCN-IR-NEXT: v_cmp_ne_u64_e64 s[6:7], 63, v[4:5] ; GCN-IR-NEXT: v_mov_b32_e32 v3, 0x8000 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], vcc ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[4:5] @@ -1109,55 +1120,54 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v6 -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v6 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v7, vcc -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 +; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[4:5], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[10:11], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[10:11] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc -; GCN-IR-NEXT: v_lshr_b64 v[8:9], s[4:5], v8 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 47, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v12, vcc, 47, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], s[8:9], v6 +; GCN-IR-NEXT: v_subb_u32_e64 v13, s[8:9], 0, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB9_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v12, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v13, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v1 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v10, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v11, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v1 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v12 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB9_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB9_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB9_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v2, v5, v1 ; GCN-IR-NEXT: v_or_b32_e32 v3, v4, v0 @@ -1184,13 +1194,13 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e64 v2, s[4:5], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v6, s[4:5], 48, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, s[4:5] +; GCN-IR-NEXT: v_min_u32_e32 v6, v2, v3 +; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 48, v6 +; GCN-IR-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, s[4:5] ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[6:7] +; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[6:7] +; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5] ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] @@ -1198,52 +1208,51 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB10_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v6 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v6 -; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] +; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[0:1], v2 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB10_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[0:1], v8 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, 0xffffffcf, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v1, s[4:5], 0, -1, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 0xffffffcf, v6 +; GCN-IR-NEXT: v_lshr_b64 v[0:1], v[0:1], v7 +; GCN-IR-NEXT: v_addc_u32_e64 v9, s[8:9], 0, -1, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff +; GCN-IR-NEXT: s_movk_i32 s10, 0x7fff ; GCN-IR-NEXT: .LBB10_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 +; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v6 +; GCN-IR-NEXT: v_or_b32_e32 v0, v0, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 -; GCN-IR-NEXT: v_and_b32_e32 v8, 0x8000, v8 -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v6, s[4:5], v6, v8 -; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 -; GCN-IR-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v7, s[4:5] -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s10, v0 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v1, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v6, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v6, 31, v4 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v6 +; GCN-IR-NEXT: v_and_b32_e32 v6, 0x8000, v6 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GCN-IR-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v8 +; GCN-IR-NEXT: v_or_b32_e32 v3, v7, v3 +; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v7, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v6, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB10_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB10_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB10_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v2, v5, v1 ; GCN-IR-NEXT: v_or_b32_e32 v3, v4, v0 @@ -1290,52 +1299,58 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b64 s12, s[2:3] -; GCN-IR-NEXT: s_sub_u32 s8, 59, s12 +; GCN-IR-NEXT: s_flbit_i32_b64 s10, s[2:3] +; GCN-IR-NEXT: s_sub_u32 s8, 59, s10 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec ; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s10, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[10:11], 0 +; GCN-IR-NEXT: s_add_u32 s11, s8, 1 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 +; GCN-IR-NEXT: s_or_b32 s6, s6, s7 +; GCN-IR-NEXT: s_cmp_lg_u32 s6, 0 +; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[2:3], s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[8:9], s[2:3], s10 -; GCN-IR-NEXT: s_add_u32 s2, s12, 0xffffffc4 -; GCN-IR-NEXT: s_addc_u32 s3, 0, -1 -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 +; GCN-IR-NEXT: s_lshr_b64 s[2:3], s[2:3], s11 +; GCN-IR-NEXT: s_add_u32 s10, s10, 0xffffffc4 +; GCN-IR-NEXT: s_addc_u32 s11, 0, -1 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: .LBB11_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 +; GCN-IR-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 ; GCN-IR-NEXT: s_lshr_b32 s4, s7, 31 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] -; GCN-IR-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s4, 23, s8 -; GCN-IR-NEXT: s_subb_u32 s4, 0, s9 -; GCN-IR-NEXT: s_ashr_i32 s10, s4, 31 -; GCN-IR-NEXT: s_and_b32 s4, s10, 1 -; GCN-IR-NEXT: s_and_b32 s10, s10, 24 -; GCN-IR-NEXT: s_sub_u32 s8, s8, s10 -; GCN-IR-NEXT: s_subb_u32 s9, s9, 0 -; GCN-IR-NEXT: s_add_u32 s2, s2, 1 -; GCN-IR-NEXT: s_addc_u32 s3, s3, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 -; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s4, 23, s2 +; GCN-IR-NEXT: s_subb_u32 s4, 0, s3 +; GCN-IR-NEXT: s_ashr_i32 s8, s4, 31 +; GCN-IR-NEXT: s_and_b32 s4, s8, 1 +; GCN-IR-NEXT: s_and_b32 s8, s8, 24 +; GCN-IR-NEXT: s_sub_u32 s2, s2, s8 +; GCN-IR-NEXT: s_subb_u32 s3, s3, 0 +; GCN-IR-NEXT: s_add_u32 s10, s10, 1 +; GCN-IR-NEXT: s_cselect_b64 s[12:13], -1, 0 +; GCN-IR-NEXT: s_or_b32 s12, s12, s13 +; GCN-IR-NEXT: s_cmp_lg_u32 s12, 0 +; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 +; GCN-IR-NEXT: s_cselect_b64 s[12:13], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[12:13] ; GCN-IR-NEXT: s_cbranch_vccz .LBB11_3 ; GCN-IR-NEXT: .LBB11_4: ; %Flow6 @@ -1384,13 +1399,13 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e64 v2, s[4:5], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v6, s[4:5], 59, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, s[4:5] +; GCN-IR-NEXT: v_min_u32_e32 v6, v2, v3 +; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 59, v6 +; GCN-IR-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, s[4:5] ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[6:7] +; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[4:5] ; GCN-IR-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[6:7] +; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 63, v[4:5] ; GCN-IR-NEXT: s_xor_b64 s[6:7], s[4:5], -1 ; GCN-IR-NEXT: v_cndmask_b32_e64 v2, v1, 0, s[4:5] ; GCN-IR-NEXT: v_cndmask_b32_e64 v3, v0, 0, s[4:5] @@ -1398,51 +1413,50 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v6 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v6 -; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] +; GCN-IR-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; GCN-IR-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[0:1], v2 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB12_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[0:1], v8 -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc4, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v1, s[4:5], 0, -1, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 0xffffffc4, v6 +; GCN-IR-NEXT: v_lshr_b64 v[0:1], v[0:1], v7 +; GCN-IR-NEXT: v_addc_u32_e64 v9, s[8:9], 0, -1, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB12_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 +; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, 23, v6 +; GCN-IR-NEXT: v_or_b32_e32 v0, v0, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_add_i32_e32 v0, vcc, 1, v0 -; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 -; GCN-IR-NEXT: v_and_b32_e32 v8, 24, v8 -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] -; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v6, s[4:5], v6, v8 -; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 -; GCN-IR-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v7, s[4:5] -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, 23, v0 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v1, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v6, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v6, 31, v4 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v6 +; GCN-IR-NEXT: v_and_b32_e32 v6, 24, v6 +; GCN-IR-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GCN-IR-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v8 +; GCN-IR-NEXT: v_or_b32_e32 v3, v7, v3 +; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v7, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v6, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB12_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB12_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB12_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[0:1], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v2, v5, v1 ; GCN-IR-NEXT: v_or_b32_e32 v3, v4, v0 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index b846ce7..cdcc914 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -170,35 +170,38 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[6:7], 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[2:3], 0 ; GCN-IR-NEXT: s_flbit_i32_b64 s10, s[6:7] -; GCN-IR-NEXT: s_flbit_i32_b64 s18, s[2:3] +; GCN-IR-NEXT: s_flbit_i32_b64 s16, s[2:3] ; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] -; GCN-IR-NEXT: s_sub_u32 s12, s10, s18 +; GCN-IR-NEXT: s_sub_u32 s12, s10, s16 ; GCN-IR-NEXT: s_subb_u32 s13, 0, 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[14:15], s[12:13], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[12:13], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[12:13], 63 ; GCN-IR-NEXT: s_or_b64 s[14:15], s[8:9], s[14:15] ; GCN-IR-NEXT: s_and_b64 s[8:9], s[14:15], exec ; GCN-IR-NEXT: s_cselect_b32 s9, 0, s3 ; GCN-IR-NEXT: s_cselect_b32 s8, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] +; GCN-IR-NEXT: s_or_b64 s[14:15], s[14:15], s[18:19] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[14:15] ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s14, s12, 1 -; GCN-IR-NEXT: s_addc_u32 s15, s13, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-IR-NEXT: s_or_b32 s8, s8, s9 +; GCN-IR-NEXT: s_cmp_lg_u32 s8, 0 +; GCN-IR-NEXT: s_addc_u32 s8, s13, 0 +; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[8:9] ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[2:3], s12 ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[12:13], s[2:3], s14 -; GCN-IR-NEXT: s_add_u32 s16, s6, -1 -; GCN-IR-NEXT: s_addc_u32 s17, s7, -1 +; GCN-IR-NEXT: s_add_u32 s14, s6, -1 +; GCN-IR-NEXT: s_addc_u32 s15, s7, -1 ; GCN-IR-NEXT: s_not_b64 s[4:5], s[10:11] -; GCN-IR-NEXT: s_add_u32 s10, s4, s18 -; GCN-IR-NEXT: s_addc_u32 s11, s5, 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], 0 +; GCN-IR-NEXT: s_add_u32 s16, s4, s16 +; GCN-IR-NEXT: s_addc_u32 s17, s5, 0 +; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: .LBB0_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -206,19 +209,22 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-IR-NEXT: s_lshr_b32 s4, s9, 31 ; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 ; GCN-IR-NEXT: s_or_b64 s[12:13], s[12:13], s[4:5] -; GCN-IR-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9] -; GCN-IR-NEXT: s_sub_u32 s4, s16, s12 -; GCN-IR-NEXT: s_subb_u32 s4, s17, s13 -; GCN-IR-NEXT: s_ashr_i32 s14, s4, 31 -; GCN-IR-NEXT: s_mov_b32 s15, s14 -; GCN-IR-NEXT: s_and_b32 s4, s14, 1 -; GCN-IR-NEXT: s_and_b64 s[14:15], s[14:15], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s12, s12, s14 -; GCN-IR-NEXT: s_subb_u32 s13, s13, s15 -; GCN-IR-NEXT: s_add_u32 s10, s10, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s11, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 -; GCN-IR-NEXT: s_mov_b64 s[14:15], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; GCN-IR-NEXT: s_sub_u32 s4, s14, s12 +; GCN-IR-NEXT: s_subb_u32 s4, s15, s13 +; GCN-IR-NEXT: s_ashr_i32 s10, s4, 31 +; GCN-IR-NEXT: s_mov_b32 s11, s10 +; GCN-IR-NEXT: s_and_b32 s4, s10, 1 +; GCN-IR-NEXT: s_and_b64 s[18:19], s[10:11], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s12, s12, s18 +; GCN-IR-NEXT: s_subb_u32 s13, s13, s19 +; GCN-IR-NEXT: s_add_u32 s16, s16, 1 +; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-IR-NEXT: s_or_b32 s18, s18, s19 +; GCN-IR-NEXT: s_cmp_lg_u32 s18, 0 +; GCN-IR-NEXT: s_addc_u32 s17, s17, 0 +; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[18:19] ; GCN-IR-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-IR-NEXT: .LBB0_4: ; %Flow7 @@ -362,12 +368,12 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v4, v2 ; GCN-IR-NEXT: v_add_i32_e64 v4, s[6:7], 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v3 -; GCN-IR-NEXT: v_min_u32_e32 v12, v4, v5 +; GCN-IR-NEXT: v_min_u32_e32 v10, v4, v5 ; GCN-IR-NEXT: v_ffbh_u32_e32 v4, v0 ; GCN-IR-NEXT: v_add_i32_e64 v4, s[6:7], 32, v4 ; GCN-IR-NEXT: v_ffbh_u32_e32 v5, v1 -; GCN-IR-NEXT: v_min_u32_e32 v13, v4, v5 -; GCN-IR-NEXT: v_sub_i32_e64 v4, s[6:7], v12, v13 +; GCN-IR-NEXT: v_min_u32_e32 v11, v4, v5 +; GCN-IR-NEXT: v_sub_i32_e64 v4, s[6:7], v10, v11 ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3] ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_subb_u32_e64 v5, s[6:7], 0, 0, s[6:7] @@ -383,54 +389,53 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) { ; GCN-IR-NEXT: s_cbranch_execz .LBB1_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; GCN-IR-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v4, s[4:5], 63, v4 -; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[0:1], v4 +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB1_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, -1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, -1, v3, vcc -; GCN-IR-NEXT: v_not_b32_e32 v6, v12 -; GCN-IR-NEXT: v_lshr_b64 v[10:11], v[0:1], v8 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, v6, v13 -; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v9, s[4:5], -1, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v13, 0 +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v3, vcc +; GCN-IR-NEXT: v_not_b32_e32 v6, v10 +; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, v6, v11 +; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v8 +; GCN-IR-NEXT: v_addc_u32_e64 v15, s[8:9], -1, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: .LBB1_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[10:11], v[10:11], 1 +; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v6, 31, v5 -; GCN-IR-NEXT: v_or_b32_e32 v10, v10, v6 +; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v6 ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v14, v10 -; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v15, v11, vcc -; GCN-IR-NEXT: v_or_b32_e32 v4, v12, v4 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v6 -; GCN-IR-NEXT: v_add_i32_e32 v8, vcc, 1, v8 -; GCN-IR-NEXT: v_or_b32_e32 v5, v13, v5 -; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v12 -; GCN-IR-NEXT: v_and_b32_e32 v13, v12, v3 -; GCN-IR-NEXT: v_and_b32_e32 v12, v12, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] -; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v12 -; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v13, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v13, v7 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v12, v6 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v12, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, v13, v9, vcc +; GCN-IR-NEXT: v_or_b32_e32 v4, v10, v4 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v6 +; GCN-IR-NEXT: v_or_b32_e32 v5, v11, v5 +; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v10 +; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v3 +; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v2 +; GCN-IR-NEXT: v_sub_i32_e32 v8, vcc, v8, v10 +; GCN-IR-NEXT: v_subb_u32_e32 v9, vcc, v9, v11, vcc +; GCN-IR-NEXT: v_add_i32_e32 v14, vcc, 1, v14 +; GCN-IR-NEXT: v_addc_u32_e32 v15, vcc, 0, v15, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v11, v7 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v10, v6 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB1_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB1_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB1_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[4:5], v[4:5], 1 ; GCN-IR-NEXT: v_or_b32_e32 v7, v7, v5 ; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 @@ -948,34 +953,37 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b64 s12, s[2:3] -; GCN-IR-NEXT: s_add_u32 s8, s12, 0xffffffc5 +; GCN-IR-NEXT: s_flbit_i32_b64 s14, s[2:3] +; GCN-IR-NEXT: s_add_u32 s8, s14, 0xffffffc5 ; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[10:11], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[10:11], s[6:7], s[10:11] ; GCN-IR-NEXT: s_and_b64 s[6:7], s[10:11], exec ; GCN-IR-NEXT: s_cselect_b32 s6, 0, 24 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] +; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[10:11] ; GCN-IR-NEXT: s_mov_b32 s7, 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: s_add_u32 s10, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[10:11], 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 +; GCN-IR-NEXT: s_or_b32 s6, s6, s7 +; GCN-IR-NEXT: s_cmp_lg_u32 s6, 0 +; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-IR-NEXT: s_lshl_b64 s[6:7], 24, s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader ; GCN-IR-NEXT: s_lshr_b64 s[10:11], 24, s10 -; GCN-IR-NEXT: s_add_u32 s14, s2, -1 -; GCN-IR-NEXT: s_addc_u32 s15, s3, -1 -; GCN-IR-NEXT: s_sub_u32 s8, 58, s12 -; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 +; GCN-IR-NEXT: s_add_u32 s12, s2, -1 +; GCN-IR-NEXT: s_addc_u32 s13, s3, -1 +; GCN-IR-NEXT: s_sub_u32 s14, 58, s14 +; GCN-IR-NEXT: s_subb_u32 s15, 0, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: .LBB6_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -983,19 +991,22 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_lshr_b32 s4, s7, 31 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 ; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] -; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s4, s14, s10 -; GCN-IR-NEXT: s_subb_u32 s4, s15, s11 -; GCN-IR-NEXT: s_ashr_i32 s12, s4, 31 -; GCN-IR-NEXT: s_mov_b32 s13, s12 -; GCN-IR-NEXT: s_and_b32 s4, s12, 1 -; GCN-IR-NEXT: s_and_b64 s[12:13], s[12:13], s[2:3] -; GCN-IR-NEXT: s_sub_u32 s10, s10, s12 -; GCN-IR-NEXT: s_subb_u32 s11, s11, s13 -; GCN-IR-NEXT: s_add_u32 s8, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[16:17], s[8:9], 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s4, s12, s10 +; GCN-IR-NEXT: s_subb_u32 s4, s13, s11 +; GCN-IR-NEXT: s_ashr_i32 s8, s4, 31 +; GCN-IR-NEXT: s_mov_b32 s9, s8 +; GCN-IR-NEXT: s_and_b32 s4, s8, 1 +; GCN-IR-NEXT: s_and_b64 s[16:17], s[8:9], s[2:3] +; GCN-IR-NEXT: s_sub_u32 s10, s10, s16 +; GCN-IR-NEXT: s_subb_u32 s11, s11, s17 +; GCN-IR-NEXT: s_add_u32 s14, s14, 1 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_or_b32 s16, s16, s17 +; GCN-IR-NEXT: s_cmp_lg_u32 s16, 0 +; GCN-IR-NEXT: s_addc_u32 s15, s15, 0 +; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[16:17] ; GCN-IR-NEXT: s_cbranch_vccz .LBB6_3 ; GCN-IR-NEXT: .LBB6_4: ; %Flow6 @@ -1064,52 +1075,58 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR: ; %bb.0: ; %_udiv-special-cases ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) -; GCN-IR-NEXT: s_flbit_i32_b64 s12, s[2:3] -; GCN-IR-NEXT: s_sub_u32 s8, 59, s12 +; GCN-IR-NEXT: s_flbit_i32_b64 s10, s[2:3] +; GCN-IR-NEXT: s_sub_u32 s8, 59, s10 ; GCN-IR-NEXT: s_subb_u32 s9, 0, 0 ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], s[2:3], 0 ; GCN-IR-NEXT: v_cmp_gt_u64_e64 s[6:7], s[8:9], 63 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[10:11], s[8:9], 63 +; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[12:13], s[8:9], 63 ; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GCN-IR-NEXT: s_and_b64 s[6:7], s[4:5], exec ; GCN-IR-NEXT: s_cselect_b32 s7, 0, s3 ; GCN-IR-NEXT: s_cselect_b32 s6, 0, s2 -; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[10:11] +; GCN-IR-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13] ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GCN-IR-NEXT: s_mov_b64 s[4:5], 0 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_5 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 -; GCN-IR-NEXT: s_add_u32 s10, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s11, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[6:7], s[10:11], 0 +; GCN-IR-NEXT: s_add_u32 s11, s8, 1 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 +; GCN-IR-NEXT: s_or_b32 s6, s6, s7 +; GCN-IR-NEXT: s_cmp_lg_u32 s6, 0 +; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 +; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 ; GCN-IR-NEXT: s_andn2_b64 vcc, exec, s[6:7] ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[2:3], s8 ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_4 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: s_lshr_b64 s[10:11], s[2:3], s10 -; GCN-IR-NEXT: s_add_u32 s8, s12, 0xffffffc4 -; GCN-IR-NEXT: s_addc_u32 s9, 0, -1 -; GCN-IR-NEXT: s_mov_b64 s[12:13], 0 +; GCN-IR-NEXT: s_lshr_b64 s[8:9], s[2:3], s11 +; GCN-IR-NEXT: s_add_u32 s12, s10, 0xffffffc4 +; GCN-IR-NEXT: s_addc_u32 s13, 0, -1 +; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 ; GCN-IR-NEXT: s_mov_b32 s5, 0 ; GCN-IR-NEXT: .LBB7_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: s_lshl_b64 s[10:11], s[10:11], 1 +; GCN-IR-NEXT: s_lshl_b64 s[8:9], s[8:9], 1 ; GCN-IR-NEXT: s_lshr_b32 s4, s7, 31 ; GCN-IR-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 -; GCN-IR-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5] -; GCN-IR-NEXT: s_or_b64 s[6:7], s[12:13], s[6:7] -; GCN-IR-NEXT: s_sub_u32 s4, 23, s10 -; GCN-IR-NEXT: s_subb_u32 s4, 0, s11 -; GCN-IR-NEXT: s_ashr_i32 s12, s4, 31 -; GCN-IR-NEXT: s_and_b32 s4, s12, 1 -; GCN-IR-NEXT: s_and_b32 s12, s12, 24 -; GCN-IR-NEXT: s_sub_u32 s10, s10, s12 -; GCN-IR-NEXT: s_subb_u32 s11, s11, 0 -; GCN-IR-NEXT: s_add_u32 s8, s8, 1 -; GCN-IR-NEXT: s_addc_u32 s9, s9, 0 -; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[14:15], s[8:9], 0 -; GCN-IR-NEXT: s_mov_b64 s[12:13], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[8:9], s[8:9], s[4:5] +; GCN-IR-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7] +; GCN-IR-NEXT: s_sub_u32 s4, 23, s8 +; GCN-IR-NEXT: s_subb_u32 s4, 0, s9 +; GCN-IR-NEXT: s_ashr_i32 s10, s4, 31 +; GCN-IR-NEXT: s_and_b32 s4, s10, 1 +; GCN-IR-NEXT: s_and_b32 s10, s10, 24 +; GCN-IR-NEXT: s_sub_u32 s8, s8, s10 +; GCN-IR-NEXT: s_subb_u32 s9, s9, 0 +; GCN-IR-NEXT: s_add_u32 s12, s12, 1 +; GCN-IR-NEXT: s_cselect_b64 s[14:15], -1, 0 +; GCN-IR-NEXT: s_or_b32 s14, s14, s15 +; GCN-IR-NEXT: s_cmp_lg_u32 s14, 0 +; GCN-IR-NEXT: s_addc_u32 s13, s13, 0 +; GCN-IR-NEXT: s_cselect_b64 s[14:15], -1, 0 +; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] ; GCN-IR-NEXT: s_and_b64 vcc, exec, s[14:15] ; GCN-IR-NEXT: s_cbranch_vccz .LBB7_3 ; GCN-IR-NEXT: .LBB7_4: ; %Flow6 @@ -1241,8 +1258,8 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 -; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 0xffffffd0, v10 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 +; GCN-IR-NEXT: v_add_i32_e32 v2, vcc, 0xffffffd0, v8 ; GCN-IR-NEXT: v_addc_u32_e64 v3, s[6:7], 0, -1, vcc ; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[2:3] @@ -1257,54 +1274,53 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-IR-NEXT: s_cbranch_execz .LBB8_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc -; GCN-IR-NEXT: s_mov_b64 s[4:5], 0x8000 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0x8000 +; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[8:9], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_lshl_b64 v[2:3], s[4:5], v2 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[8:9] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[10:11], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[10:11] ; GCN-IR-NEXT: s_cbranch_execz .LBB8_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, -1, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, -1, v1, vcc -; GCN-IR-NEXT: v_lshr_b64 v[8:9], s[4:5], v6 -; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, 47, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_subb_u32_e64 v7, s[4:5], 0, 0, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, -1, v0 +; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, -1, v1, vcc +; GCN-IR-NEXT: v_sub_i32_e32 v12, vcc, 47, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], s[8:9], v6 +; GCN-IR-NEXT: v_subb_u32_e64 v13, s[8:9], 0, 0, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: .LBB8_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v12, v8 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v13, v9, vcc -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v11, v10, v1 -; GCN-IR-NEXT: v_and_b32_e32 v10, v10, v0 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v11, s[4:5] -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, v10, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, v11, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v9, v8, v1 +; GCN-IR-NEXT: v_and_b32_e32 v8, v8, v0 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc +; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v12 +; GCN-IR-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB8_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB8_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB8_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 @@ -1337,8 +1353,8 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: v_ffbh_u32_e32 v2, v0 ; GCN-IR-NEXT: v_add_i32_e64 v2, s[4:5], 32, v2 ; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1 -; GCN-IR-NEXT: v_min_u32_e32 v10, v2, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 48, v10 +; GCN-IR-NEXT: v_min_u32_e32 v8, v2, v3 +; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 48, v8 ; GCN-IR-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, s[4:5] ; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] ; GCN-IR-NEXT: v_cmp_lt_u64_e64 s[4:5], 63, v[2:3] @@ -1352,51 +1368,50 @@ define i64 @v_test_urem_pow2_k_den_i64(i64 %x) { ; GCN-IR-NEXT: s_cbranch_execz .LBB9_6 ; GCN-IR-NEXT: ; %bb.1: ; %udiv-bb1 ; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v3, vcc +; GCN-IR-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; GCN-IR-NEXT: v_sub_i32_e64 v2, s[4:5], 63, v2 -; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 -; GCN-IR-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[0:1], v2 +; GCN-IR-NEXT: v_mov_b32_e32 v4, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_and_saveexec_b64 s[4:5], vcc -; GCN-IR-NEXT: s_xor_b64 s[8:9], exec, s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], vcc, -1 +; GCN-IR-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] +; GCN-IR-NEXT: s_xor_b64 s[4:5], exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execz .LBB9_5 ; GCN-IR-NEXT: ; %bb.2: ; %udiv-preheader -; GCN-IR-NEXT: v_lshr_b64 v[8:9], v[0:1], v6 -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 0xffffffcf, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v10, 0 -; GCN-IR-NEXT: v_addc_u32_e64 v7, s[4:5], 0, -1, vcc -; GCN-IR-NEXT: s_mov_b64 s[10:11], 0 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 0xffffffcf, v8 +; GCN-IR-NEXT: v_lshr_b64 v[6:7], v[0:1], v6 +; GCN-IR-NEXT: v_addc_u32_e64 v11, s[8:9], 0, -1, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v8, 0 +; GCN-IR-NEXT: s_mov_b64 s[8:9], 0 +; GCN-IR-NEXT: v_mov_b32_e32 v9, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 -; GCN-IR-NEXT: s_movk_i32 s12, 0x7fff +; GCN-IR-NEXT: s_movk_i32 s10, 0x7fff ; GCN-IR-NEXT: .LBB9_3: ; %udiv-do-while ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-IR-NEXT: v_lshl_b64 v[8:9], v[8:9], 1 +; GCN-IR-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v8, v8, v4 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v8 +; GCN-IR-NEXT: v_or_b32_e32 v6, v6, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v9, vcc -; GCN-IR-NEXT: v_add_i32_e32 v6, vcc, 1, v6 -; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 -; GCN-IR-NEXT: v_and_b32_e32 v10, 0x8000, v10 -; GCN-IR-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] -; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v10 -; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v9, s[4:5] -; GCN-IR-NEXT: s_or_b64 s[10:11], vcc, s[10:11] -; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 -; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[10:11] +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s10, v6 +; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v7, vcc +; GCN-IR-NEXT: v_or_b32_e32 v2, v8, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v8, 31, v4 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v8 +; GCN-IR-NEXT: v_and_b32_e32 v8, 0x8000, v8 +; GCN-IR-NEXT: v_sub_i32_e32 v6, vcc, v6, v8 +; GCN-IR-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v7, vcc +; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v10 +; GCN-IR-NEXT: v_or_b32_e32 v3, v9, v3 +; GCN-IR-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; GCN-IR-NEXT: v_mov_b32_e32 v9, v5 +; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] +; GCN-IR-NEXT: v_mov_b32_e32 v8, v4 +; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] ; GCN-IR-NEXT: s_cbranch_execnz .LBB9_3 ; GCN-IR-NEXT: ; %bb.4: ; %Flow -; GCN-IR-NEXT: s_or_b64 exec, exec, s[10:11] -; GCN-IR-NEXT: .LBB9_5: ; %Flow4 ; GCN-IR-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-IR-NEXT: .LBB9_5: ; %Flow4 +; GCN-IR-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 ; GCN-IR-NEXT: v_or_b32_e32 v5, v5, v3 ; GCN-IR-NEXT: v_or_b32_e32 v4, v4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll index 0289dab..d67a7b1 100644 --- a/llvm/test/CodeGen/AMDGPU/usubo.ll +++ b/llvm/test/CodeGen/AMDGPU/usubo.ll @@ -14,15 +14,16 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_sub_u32 s0, s2, s8 -; SI-NEXT: v_mov_b32_e32 v0, s2 +; SI-NEXT: s_sub_u32 s2, s2, s8 ; SI-NEXT: s_mov_b32 s5, s1 -; SI-NEXT: s_subb_u32 s1, s3, s9 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: s_or_b32 s0, s0, s1 +; SI-NEXT: s_cmp_lg_u32 s0, 0 +; SI-NEXT: s_subb_u32 s3, s3, s9 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; SI-NEXT: v_mov_b32_e32 v1, s3 -; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1] -; SI-NEXT: v_mov_b32_e32 v1, s1 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, s2, v0 ; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -33,15 +34,15 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_sub_u32 s0, s2, s4 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_sub_u32 s2, s2, s4 ; VI-NEXT: v_mov_b32_e32 v1, s1 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: s_cmp_lg_u64 s[0:1], 0 +; VI-NEXT: s_subb_u32 s3, s3, s5 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: s_subb_u32 s1, s3, s5 -; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3] -; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 +; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; VI-NEXT: s_endpgm @@ -52,14 +53,14 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: s_sub_u32 s4, s2, s6 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: s_subb_u32 s5, s3, s7 -; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[4:5], v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v1, s5 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0 +; GFX9-NEXT: s_sub_u32 s6, s2, s6 +; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 +; GFX9-NEXT: s_subb_u32 s4, s3, s7 +; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3] +; GFX9-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s6, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm @@ -71,12 +72,14 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_sub_u32 s4, s2, s6 -; GFX10-NEXT: s_subb_u32 s5, s3, s7 -; GFX10-NEXT: v_cmp_gt_u64_e64 s2, s[4:5], s[2:3] -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 -; GFX10-NEXT: v_add_co_u32 v0, s2, s4, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, s5, 0, s2 +; GFX10-NEXT: s_sub_u32 s2, s2, s6 +; GFX10-NEXT: s_cselect_b32 s4, -1, 0 +; GFX10-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10-NEXT: s_subb_u32 s3, s3, s7 +; GFX10-NEXT: s_cselect_b32 s4, -1, 0 +; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX10-NEXT: v_add_co_u32 v0, s2, s2, v0 +; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, s3, 0, s2 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm ; @@ -87,14 +90,16 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 % ; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_sub_u32 s4, s2, s4 -; GFX11-NEXT: s_subb_u32 s5, s3, s5 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cmp_gt_u64_e64 s2, s[4:5], s[2:3] -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX11-NEXT: s_sub_u32 s2, s2, s4 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-NEXT: s_subb_u32 s3, s3, s5 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_add_co_u32 v0, s2, s4, v0 -; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s2 +; GFX11-NEXT: v_add_co_u32 v0, s2, s2, v0 +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, 0, s2 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-NEXT: s_endpgm %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0 @@ -435,21 +440,23 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s10, -1 ; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: s_sub_u32 s6, s4, s6 -; SI-NEXT: v_mov_b32_e32 v0, s4 -; SI-NEXT: s_subb_u32 s7, s5, s7 -; SI-NEXT: v_mov_b32_e32 v1, s5 -; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1] -; SI-NEXT: v_mov_b32_e32 v2, s6 +; SI-NEXT: s_sub_u32 s4, s4, s6 +; SI-NEXT: s_cselect_b64 s[12:13], -1, 0 +; SI-NEXT: s_or_b32 s6, s12, s13 +; SI-NEXT: s_cmp_lg_u32 s6, 0 +; SI-NEXT: s_subb_u32 s5, s5, s7 ; SI-NEXT: s_mov_b32 s8, s0 ; SI-NEXT: s_mov_b32 s9, s1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: v_mov_b32_e32 v1, s5 +; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 ; SI-NEXT: s_mov_b32 s0, s2 ; SI-NEXT: s_mov_b32 s1, s3 ; SI-NEXT: s_mov_b32 s2, s10 ; SI-NEXT: s_mov_b32 s3, s11 -; SI-NEXT: v_mov_b32_e32 v3, s7 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_waitcnt expcnt(0) +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -457,37 +464,37 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_sub_u32 s2, s4, s6 ; VI-NEXT: v_mov_b32_e32 v0, s0 -; VI-NEXT: s_sub_u32 s0, s4, s6 -; VI-NEXT: v_mov_b32_e32 v4, s4 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: s_subb_u32 s1, s5, s7 -; VI-NEXT: v_mov_b32_e32 v5, s5 -; VI-NEXT: v_mov_b32_e32 v7, s1 -; VI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[4:5] -; VI-NEXT: v_mov_b32_e32 v6, s0 -; VI-NEXT: v_mov_b32_e32 v2, s2 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; VI-NEXT: s_cmp_lg_u64 s[0:1], 0 +; VI-NEXT: s_subb_u32 s0, s5, s7 +; VI-NEXT: v_mov_b32_e32 v4, s2 +; VI-NEXT: v_mov_b32_e32 v5, s0 +; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s3 -; VI-NEXT: flat_store_dwordx2 v[0:1], v[6:7] -; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; VI-NEXT: flat_store_byte v[2:3], v0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: s_usubo_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_sub_u32 s0, s12, s14 -; GFX9-NEXT: v_mov_b32_e32 v0, s12 -; GFX9-NEXT: v_mov_b32_e32 v1, s13 -; GFX9-NEXT: s_subb_u32 s1, s13, s15 -; GFX9-NEXT: v_mov_b32_e32 v3, s1 -; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1] -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] -; GFX9-NEXT: global_store_byte v4, v0, s[10:11] +; GFX9-NEXT: s_sub_u32 s2, s12, s14 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0 +; GFX9-NEXT: s_subb_u32 s0, s13, s15 +; GFX9-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] +; GFX9-NEXT: global_store_byte v2, v3, s[10:11] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: s_usubo_i64: @@ -496,10 +503,12 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_sub_u32 s0, s12, s14 -; GFX10-NEXT: s_subb_u32 s1, s13, s15 +; GFX10-NEXT: s_cselect_b32 s1, -1, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: s_cmp_lg_u32 s1, 0 +; GFX10-NEXT: s_subb_u32 s1, s13, s15 +; GFX10-NEXT: s_cselect_b32 s0, -1, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], s[12:13] ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0 ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX10-NEXT: global_store_byte v2, v3, s[10:11] @@ -509,12 +518,13 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_sub_u32 s6, s4, s6 -; GFX11-NEXT: s_subb_u32 s7, s5, s7 -; GFX11-NEXT: v_mov_b32_e32 v0, s6 -; GFX11-NEXT: v_cmp_gt_u64_e64 s4, s[6:7], s[4:5] -; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s7 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: s_sub_u32 s4, s4, s6 +; GFX11-NEXT: s_cselect_b32 s6, -1, 0 +; GFX11-NEXT: v_mov_b32_e32 v0, s4 +; GFX11-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-NEXT: s_subb_u32 s5, s5, s7 +; GFX11-NEXT: s_cselect_b32 s4, -1, 0 +; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s5 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] @@ -550,10 +560,10 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; SI-NEXT: s_mov_b32 s4, s2 ; SI-NEXT: s_mov_b32 s5, s3 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_sub_i32_e32 v2, vcc, v0, v2 -; SI-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; SI-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0 +; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; SI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0 +; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm @@ -573,10 +583,9 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; VI-NEXT: v_mov_b32_e32 v6, s2 ; VI-NEXT: v_mov_b32_e32 v7, s3 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_sub_u32_e32 v2, vcc, v0, v2 -; VI-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; VI-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; VI-NEXT: flat_store_dwordx2 v[4:5], v[2:3] +; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; VI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] ; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; VI-NEXT: flat_store_byte v[6:7], v0 ; VI-NEXT: s_endpgm @@ -589,10 +598,9 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX9-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13] ; GFX9-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc -; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9] ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-NEXT: global_store_byte v4, v0, s[10:11] ; GFX9-NEXT: s_endpgm @@ -606,12 +614,11 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13] ; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 -; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9] -; GFX10-NEXT: global_store_byte v4, v0, s[10:11] +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9] +; GFX10-NEXT: global_store_byte v4, v2, s[10:11] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: v_usubo_i64: @@ -623,14 +630,12 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) % ; GFX11-NEXT: global_load_b64 v[0:1], v4, s[4:5] ; GFX11-NEXT: global_load_b64 v[2:3], v4, s[6:7] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo -; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_store_b64 v4, v[2:3], s[0:1] -; GFX11-NEXT: global_store_b8 v4, v0, s[2:3] +; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1] +; GFX11-NEXT: global_store_b8 v4, v2, s[2:3] ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll index 90491a0..3ddb2f0 100644 --- a/llvm/test/CodeGen/AMDGPU/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll @@ -730,52 +730,38 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) { ; GFX6-LABEL: v_usubsat_i64: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v0, v2 -; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; GFX6-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc +; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_usubsat_i64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v0, v2 -; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc -; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_usubsat_i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc -; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_usubsat_i64: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 -; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_usubsat_i64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 -; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo -; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] -; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: v_usubsat_i64: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10PLUS-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc_lo +; GFX10PLUS-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc_lo +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %result = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %result } diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags_V1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags_V1.ll new file mode 100644 index 0000000..610ce4f --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags_V1.ll @@ -0,0 +1,18 @@ +; RUN: not opt -passes='print<dxil-root-signature>' %s -S -o - 2>&1 | FileCheck %s +; On Version 1, the only valid flag is DataVolatile (2). +target triple = "dxil-unknown-shadermodel6.0-compute" + + +; CHECK: error: Invalid value for RootDescriptorFlag: 4 +; CHECK-NOT: Root Signature Definitions +define void @main() #0 { +entry: + ret void +} +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + + +!dx.rootsignatures = !{!2} ; list of function/root signature pairs +!2 = !{ ptr @main, !3, i32 1 } ; function, root signature +!3 = !{ !5 } ; list of root signature elements +!5 = !{ !"RootCBV", i32 0, i32 1, i32 2, i32 4 } diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers-Invalid-Flag_V1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers-Invalid-Flag_V1.ll new file mode 100644 index 0000000..76b60b8 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers-Invalid-Flag_V1.ll @@ -0,0 +1,19 @@ +; RUN: not opt -passes='print<dxil-root-signature>' %s -S -o - 2>&1 | FileCheck %s + + +target triple = "dxil-unknown-shadermodel6.0-compute" + +; CHECK: error: Invalid value for Static Sampler Flag: 1 +; CHECK-NOT: Root Signature Definitions + +define void @main() #0 { +entry: + ret void +} +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + + +!dx.rootsignatures = !{!2} ; list of function/root signature pairs +!2 = !{ ptr @main, !3, i32 1 } ; function, root signature +!3 = !{ !5 } ; list of root signature elements +!5 = !{ !"StaticSampler", i32 4, i32 2, i32 3, i32 5, float 0x3FF6CCCCC0000000, i32 9, i32 3, i32 2, float -1.280000e+02, float 1.280000e+02, i32 42, i32 0, i32 0, i32 1 } diff --git a/llvm/test/CodeGen/Hexagon/fmul-v67.ll b/llvm/test/CodeGen/Hexagon/fmul-v67.ll index 49098cd..fc0b7f7 100644 --- a/llvm/test/CodeGen/Hexagon/fmul-v67.ll +++ b/llvm/test/CodeGen/Hexagon/fmul-v67.ll @@ -29,7 +29,7 @@ b2: ; CHECK: [[R22]] += dfmpylh([[R20]],[[R21]]) ; CHECK: [[R22]] += dfmpylh([[R21]],[[R20]]) ; CHECK: [[R22]] += dfmpyhh([[R20]],[[R21]]) -define double @test_02(double %a0, double %a1) #2 { +define double @test_02(double %a0, double %a1) #1 { b2: %v3 = fmul double %a0, %a1 ret double %v3 @@ -40,13 +40,11 @@ b2: ; CHECK: [[R30]] += dfmpylh(r1:0,r3:2) ; CHECK: [[R30]] += dfmpylh(r3:2,r1:0) ; CHECK: [[R30]] += dfmpyhh(r1:0,r3:2) -define double @test_03(double %a0, double %a1) #3 { +define double @test_03(double %a0, double %a1) #1 { b2: - %v3 = fmul double %a0, %a1 + %v3 = fmul afn double %a0, %a1 ret double %v3 } attributes #0 = { nounwind } attributes #1 = { nounwind "target-cpu"="hexagonv67" } -attributes #2 = { nounwind "target-cpu"="hexagonv67" "unsafe-fp-math"="false" } -attributes #3 = { nounwind "target-cpu"="hexagonv67" "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll index 1da516a..80b4048 100644 --- a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll +++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll @@ -1,15 +1,15 @@ ; REQUIRES: x86_64-linux -; RUN: not llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID -; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM -; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES -; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS +; RUN: llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID +; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM +; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES +; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS define dso_local void @test() { entry: ret void } -; CHECK-INVALID: error: MIR2Vec vocabulary file path not specified; set it using --mir2vec-vocab-path -; CHECK-ZERO-DIM: error: Dimension of 'entities' section of the vocabulary is zero -; CHECK-NO-ENTITIES: error: Missing 'entities' section in vocabulary file -; CHECK-INCONSISTENT-DIMS: error: All vectors in the 'entities' section of the vocabulary are not of the same dimension +; CHECK-INVALID: MIR2Vec Vocabulary Printer: Failed to get vocabulary - MIR2Vec vocabulary file path not specified; set it using --mir2vec-vocab-path +; CHECK-ZERO-DIM: MIR2Vec Vocabulary Printer: Failed to get vocabulary - Dimension of 'entities' section of the vocabulary is zero +; CHECK-NO-ENTITIES: MIR2Vec Vocabulary Printer: Failed to get vocabulary - Missing 'entities' section in vocabulary file +; CHECK-INCONSISTENT-DIMS: MIR2Vec Vocabulary Printer: Failed to get vocabulary - All vectors in the 'entities' section of the vocabulary are not of the same dimension diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll index 02118fb..b503da4 100644 --- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll @@ -72,7 +72,7 @@ define internal void @bar() { ; CHECK-NEXT: [[OFFSET:%.*]] = ashr exact i64 [[TMP2]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr addrspace(1) [[BEGIN]], i64 [[OFFSET]] ; CHECK-NEXT: [[START:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[TMP3]], i64 -1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt ptr addrspace(1) [[START]], [[BEGIN]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp uge ptr addrspace(1) [[START]], [[BEGIN]] ; CHECK-NEXT: br i1 [[TMP4]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.entry: ; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[START]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll index 41a0e81..1edb387 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll @@ -12,63 +12,104 @@ declare void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols) declare void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols) declare void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols) -; CHECK-LABEL: test_tcgen05_alloc -define void @test_tcgen05_alloc(ptr %addr, i32 %ncols) { -; CHECK_PTX64-LABEL: test_tcgen05_alloc( +define void @test_tcgen05_alloc_cg1(ptr %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0]; -; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1; -; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols) - call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols) + ret void +} +define void @test_tcgen05_alloc_cg2(ptr %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1; +; CHECK_PTX64_SHARED32-NEXT: ret; + call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols) ret void } -; CHECK-LABEL: test_tcgen05_alloc_shared -define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) { -; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared( +define void @test_tcgen05_alloc_shared_cg1(ptr addrspace(3) %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_param_0]; -; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%rd1], %r1; -; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%r1], %r2; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols) + ret void +} +define void @test_tcgen05_alloc_shared_cg2(ptr addrspace(3) %addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols) ret void } @@ -76,31 +117,50 @@ define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) { declare void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) declare void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) -; CHECK-LABEL: test_tcgen05_dealloc -define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) { -; CHECK_PTX64-LABEL: test_tcgen05_dealloc( +define void @test_tcgen05_dealloc_cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b32 %r<3>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0]; -; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1]; +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2; -; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) + ret void +} +define void @test_tcgen05_dealloc_cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) { +; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b32 %r<3>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) ret void } @@ -108,27 +168,42 @@ define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) { declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1() declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2() -; CHECK-LABEL: test_tcgen05_relinquish_alloc_permit -define void @test_tcgen05_relinquish_alloc_permit() { -; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit( +define void @test_tcgen05_relinquish_alloc_permit_cg1() { +; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: ; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned; -; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: ; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1() + ret void +} +define void @test_tcgen05_relinquish_alloc_permit_cg2() { +; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2() ret void } diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll index 7981feb..2e80c4c 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-commit.ll @@ -11,57 +11,93 @@ declare void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr) declare void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr) declare void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr) -; CHECK-LABEL: test_tcgen05_commit -define void @test_tcgen05_commit(ptr %bar_addr) { -; CHECK_PTX64-LABEL: test_tcgen05_commit( +define void @test_tcgen05_commit_cg1(ptr %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg1_param_0]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.cg1(ptr %bar_addr) + ret void +} + +define void @test_tcgen05_commit_cg2(ptr %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.cg2(ptr %bar_addr) ret void } -; CHECK-LABEL: test_tcgen05_commit_shared -define void @test_tcgen05_commit_shared(ptr addrspace(3) %bar_addr) { -; CHECK_PTX64-LABEL: test_tcgen05_commit_shared( +define void @test_tcgen05_commit_shared_cg1(ptr addrspace(3) %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_param_0]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg1_param_0]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg1_param_0]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.b64 [%r1]; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1]; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %bar_addr) + ret void +} + +define void @test_tcgen05_commit_shared_cg2(ptr addrspace(3) %bar_addr) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_shared_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_shared_cg2_param_0]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%rd1]; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_shared_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_shared_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.b64 [%r1]; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %bar_addr) ret void @@ -72,66 +108,106 @@ declare void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %bar_addr, i16 %cta_mask) declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask) declare void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask) -; CHECK-LABEL: test_tcgen05_commit_mc -define void @test_tcgen05_commit_mc(ptr %bar_addr, i16 %cta_mask) { -; CHECK_PTX64-LABEL: test_tcgen05_commit_mc( +define void @test_tcgen05_commit_mc_cg1(ptr %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0]; -; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.cg1(ptr %bar_addr, i16 %cta_mask) + ret void +} +define void @test_tcgen05_commit_mc_cg2(ptr %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %bar_addr, i16 %cta_mask) - ret void } -; CHECK-LABEL: test_tcgen05_commit_mc_shared -define void @test_tcgen05_commit_mc_shared(ptr addrspace(3) %bar_addr, i16 %cta_mask) { -; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared( +define void @test_tcgen05_commit_mc_shared_cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared_cg1( ; CHECK_PTX64: { ; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; ; CHECK_PTX64-EMPTY: ; CHECK_PTX64-NEXT: // %bb.0: -; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_param_0]; -; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_param_1]; +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_cg1_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg1_param_1]; ; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; -; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; ; CHECK_PTX64-NEXT: ret; ; -; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared( +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared_cg1( ; CHECK_PTX64_SHARED32: { ; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; ; CHECK_PTX64_SHARED32-EMPTY: ; CHECK_PTX64_SHARED32-NEXT: // %bb.0: -; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_param_0]; -; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_param_1]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_cg1_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg1_param_1]; ; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::1.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1; -; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1; ; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %bar_addr, i16 %cta_mask) + ret void +} +define void @test_tcgen05_commit_mc_shared_cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask) { +; CHECK_PTX64-LABEL: test_tcgen05_commit_mc_shared_cg2( +; CHECK_PTX64: { +; CHECK_PTX64-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64-NEXT: .reg .b64 %rd<2>; +; CHECK_PTX64-EMPTY: +; CHECK_PTX64-NEXT: // %bb.0: +; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_commit_mc_shared_cg2_param_0]; +; CHECK_PTX64-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg2_param_1]; +; CHECK_PTX64-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%rd1], %rs1; +; CHECK_PTX64-NEXT: ret; +; +; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_commit_mc_shared_cg2( +; CHECK_PTX64_SHARED32: { +; CHECK_PTX64_SHARED32-NEXT: .reg .b16 %rs<2>; +; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>; +; CHECK_PTX64_SHARED32-EMPTY: +; CHECK_PTX64_SHARED32-NEXT: // %bb.0: +; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_commit_mc_shared_cg2_param_0]; +; CHECK_PTX64_SHARED32-NEXT: ld.param.b16 %rs1, [test_tcgen05_commit_mc_shared_cg2_param_1]; +; CHECK_PTX64_SHARED32-NEXT: tcgen05.commit.cta_group::2.mbarrier::arrive::one.shared::cluster.multicast::cluster.b64 [%r1], %rs1; +; CHECK_PTX64_SHARED32-NEXT: ret; call void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %bar_addr, i16 %cta_mask) - ret void } diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll index c540f78..817b1d5 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-cp.ll @@ -4,346 +4,580 @@ ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mcpu=sm_100a -mattr=+ptx86 | %ptxas-verify -arch=sm_100a %} ; RUN: %if ptxas-sm_103a && ptxas-isa-8.8 %{ llc < %s -march=nvptx64 -mcpu=sm_103a -mattr=+ptx88 | %ptxas-verify -arch=sm_103a %} -; CHECK-LABEL: test_tcgen05_cp_64x128_v1 -define void @test_tcgen05_cp_64x128_v1(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v1( +define void @test_tcgen05_cp_64x128_v1_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v1_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v2 -define void @test_tcgen05_cp_64x128_v2(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v2( +define void @test_tcgen05_cp_64x128_v2_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v2_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_32x128 -define void @test_tcgen05_cp_32x128(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_32x128( +define void @test_tcgen05_cp_32x128_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_32x128_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x128b -define void @test_tcgen05_cp_128x128b(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x128b( +define void @test_tcgen05_cp_128x128b_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x128b_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x256b -define void @test_tcgen05_cp_128x256b(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x256b( +define void @test_tcgen05_cp_128x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_4x256b -define void @test_tcgen05_cp_4x256b(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_4x256b( +define void @test_tcgen05_cp_4x256b_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_4x256b_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } ; With src_fmt as b6x16_p32 -; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32 -define void @test_tcgen05_cp_128x256b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32( +define void @test_tcgen05_cp_128x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32 -define void @test_tcgen05_cp_4x256b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32( +define void @test_tcgen05_cp_4x256b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_4x256b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32 -define void @test_tcgen05_cp_128x128b_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32( +define void @test_tcgen05_cp_128x128b_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x128b_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32 -define void @test_tcgen05_cp_64x128_v1_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32( +define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v1_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32 -define void @test_tcgen05_cp_64x128_v2_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32( +define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v2_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32 -define void @test_tcgen05_cp_32x128_b6x16_p32(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32( +define void @test_tcgen05_cp_32x128_b6x16_p32_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_32x128_b6x16_p32_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b6x16_p32_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b6x16_p32_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b6x16_p32 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b6x16_p32.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } ; With src_fmt as b4x16_p64 -; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64 -define void @test_tcgen05_cp_128x256b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64( +define void @test_tcgen05_cp_128x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x256b.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x256b_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x256b_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x256b.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64 -define void @test_tcgen05_cp_4x256b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64( +define void @test_tcgen05_cp_4x256b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.4x256b.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_4x256b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_4x256b_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_4x256b_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.4x256b.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.4x256b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64 -define void @test_tcgen05_cp_128x128b_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64( +define void @test_tcgen05_cp_128x128b_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.128x128b.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_128x128b_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_128x128b_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_128x128b_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.128x128b.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.128x128b.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64 -define void @test_tcgen05_cp_64x128_v1_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64( +define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v1_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v1_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v1_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::02_13.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_02_13.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64 -define void @test_tcgen05_cp_64x128_v2_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64( +define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_64x128_v2_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_64x128_v2_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_64x128_v2_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.64x128b.warpx2::01_23.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.64x128b_warpx2_01_23.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void } -; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64 -define void @test_tcgen05_cp_32x128_b4x16_p64(ptr addrspace(6) %addr, i64 %sdesc) { -; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64( +define void @test_tcgen05_cp_32x128_b4x16_p64_cg1(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_param_0]; -; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg1_param_1]; ; CHECK-NEXT: tcgen05.cp.cta_group::1.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1; -; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg1(ptr addrspace(6) %addr, i64 %sdesc) + + ret void +} + +define void @test_tcgen05_cp_32x128_b4x16_p64_cg2(ptr addrspace(6) %addr, i64 %sdesc) { +; CHECK-LABEL: test_tcgen05_cp_32x128_b4x16_p64_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_tcgen05_cp_32x128_b4x16_p64_cg2_param_1]; +; CHECK-NEXT: tcgen05.cp.cta_group::2.32x128b.warpx4.b8x16.b4x16_p64 [%r1], %rd1; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.cp.32x128b_warpx4.b4x16_p64.cg2(ptr addrspace(6) %addr, i64 %sdesc) ret void diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll index 8ca6a2a0..bf2adac 100644 --- a/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll +++ b/llvm/test/CodeGen/NVPTX/tcgen05-shift.ll @@ -7,18 +7,29 @@ declare void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr) declare void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr) -; CHECK-LABEL: test_tcgen05_shift -define void @test_tcgen05_shift(ptr addrspace(6) %tmem_addr) { -; CHECK-LABEL: test_tcgen05_shift( +define void @test_tcgen05_shift_cg1(ptr addrspace(6) %tmem_addr) { +; CHECK-LABEL: test_tcgen05_shift_cg1( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_cg1_param_0]; ; CHECK-NEXT: tcgen05.shift.cta_group::1.down [%r1]; -; CHECK-NEXT: tcgen05.shift.cta_group::2.down [%r1]; ; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.shift.down.cg1(ptr addrspace(6) %tmem_addr) + + ret void +} + +define void @test_tcgen05_shift_cg2(ptr addrspace(6) %tmem_addr) { +; CHECK-LABEL: test_tcgen05_shift_cg2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test_tcgen05_shift_cg2_param_0]; +; CHECK-NEXT: tcgen05.shift.cta_group::2.down [%r1]; +; CHECK-NEXT: ret; call void @llvm.nvvm.tcgen05.shift.down.cg2(ptr addrspace(6) %tmem_addr) ret void diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir index 74249c1..e2d3bff 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir @@ -17,7 +17,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8)) ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]] ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -42,7 +42,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16)) ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]] ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -67,7 +67,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32)) ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] ; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -92,7 +92,7 @@ body: | ; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32)) ; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1 ; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 ; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir index a2f7e30..ab537ea 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir @@ -17,7 +17,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -42,7 +42,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -67,7 +67,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -92,7 +92,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64)) ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]] ; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10 %0:gpr(p0) = COPY $x10 @@ -116,7 +116,7 @@ body: | ; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0 ; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 - ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64)) + ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64)) ; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1 ; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 ; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]] diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir index f7fdc33..e547972 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -38,7 +38,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -61,7 +61,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32)) + ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -86,7 +86,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -113,7 +113,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -140,7 +140,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir index 178586c..f34826c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -38,7 +38,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -61,7 +61,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32)) + ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -84,7 +84,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY]], [[COPY1]] :: (load store monotonic (s64)) + ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY1]], [[COPY]] :: (load store monotonic (s64)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_D]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -109,7 +109,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -136,7 +136,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16)) + ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_H]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -163,7 +163,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[SUB]] :: (load store monotonic (s32)) + ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[SUB]], [[COPY]] :: (load store monotonic (s32)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_W]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 @@ -190,7 +190,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]] - ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8)) + ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8)) ; CHECK-NEXT: $x10 = COPY [[AMOADD_B]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(p0) = COPY $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll index 4b1359e..73b0d3a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vec-ret.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfbfmin,+zvfh -global-isel -stop-after=irtranslator \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfbfmin,+zvfhmin -global-isel -stop-after=irtranslator \ ; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32 %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfbfmin,+zvfh -global-isel -stop-after=irtranslator \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfbfmin,+zvfhmin -global-isel -stop-after=irtranslator \ ; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64 %s ; ========================================================================== diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir index 1361d92..2e500d5 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir @@ -72,12 +72,12 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # -# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices +# DEBUG-NEXT: G_ABDS (opcode [[G_ABDS:[0-9]+]]): 1 type index, 0 imm indices # DEBUG-NEXT:.. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT:.. imm index coverage check SKIPPED: user-defined predicate detected # -# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices -# DEBUG-NEXT:.. opcode 66 is aliased to 65 +# DEBUG-NEXT:G_ABDU (opcode [[G_ABDU:[0-9]+]]): 1 type index, 0 imm indices +# DEBUG-NEXT:.. opcode [[G_ABDU]] is aliased to [[G_ABDS]] # DEBUG-NEXT:.. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT:.. imm index coverage check SKIPPED: user-defined predicate detected # diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll index 911692e..f960bc1 100644 --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -305,9 +305,6 @@ define i32 @fneg_d(double %a, double %b) nounwind { } define double @fsgnjn_d(double %a, double %b) nounwind { -; TODO: fsgnjn.s isn't selected on RV64 because DAGCombiner::visitBITCAST will -; convert (bitconvert (fneg x)) to a xor. -; ; CHECKIFD-LABEL: fsgnjn_d: ; CHECKIFD: # %bb.0: ; CHECKIFD-NEXT: fsgnjn.d fa0, fa0, fa1 diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll index 380a4a0..d1f1c46 100644 --- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll @@ -5,7 +5,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i8: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: mov 3, %o3 ; CHECK-NEXT: andn %o3, %o0, %o0 @@ -36,7 +36,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o4, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst @@ -47,7 +47,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i16: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: and %o0, 3, %o0 ; CHECK-NEXT: xor %o0, 2, %o0 @@ -79,7 +79,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o5, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst @@ -90,7 +90,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i32: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: ld [%o0], %o2 ; CHECK-NEXT: .LBB2_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 @@ -106,7 +106,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-NEXT: bne %icc, .LBB2_1 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: mov %o2, %o0 %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst @@ -160,7 +160,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i8: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: mov 3, %o3 ; CHECK-NEXT: andn %o3, %o0, %o0 @@ -193,7 +193,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %o5, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst @@ -204,7 +204,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i16: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: and %o0, -4, %o2 ; CHECK-NEXT: and %o0, 3, %o0 ; CHECK-NEXT: xor %o0, 2, %o0 @@ -238,7 +238,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end ; CHECK-NEXT: srl %g2, %o0, %o0 -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: nop %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst @@ -249,7 +249,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-LABEL: atomicrmw_udec_wrap_i32: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadStore | #StoreStore ; CHECK-NEXT: ld [%o0], %o2 ; CHECK-NEXT: .LBB6_1: ! %atomicrmw.start ; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1 @@ -267,7 +267,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; CHECK-NEXT: bne %icc, .LBB6_1 ; CHECK-NEXT: nop ; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end -; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; CHECK-NEXT: membar #LoadLoad | #LoadStore ; CHECK-NEXT: retl ; CHECK-NEXT: mov %o2, %o0 %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll new file mode 100644 index 0000000..7c13ac2 --- /dev/null +++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll @@ -0,0 +1,446 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32 +; RUN: llc < %s -mtriple=sparc -mcpu=leon4 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-LEON4 +; RUN: llc < %s -mtriple=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-V9 +; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC64 + +define i32 @load_acq(ptr %0) nounwind { +; SPARC32-LABEL: load_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_load_4 +; SPARC32-NEXT: mov 2, %o1 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: load_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: ld [%o0], %o0 +; +; SPARC32-V9-LABEL: load_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: ld [%o0], %o0 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: load_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: ld [%o0], %o0 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = load atomic i32, ptr %0 acquire, align 4 + ret i32 %2 +} + +define i32 @load_sc(ptr %0) nounwind { +; SPARC32-LABEL: load_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_load_4 +; SPARC32-NEXT: mov 5, %o1 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: load_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: ld [%o0], %o0 +; +; SPARC32-V9-LABEL: load_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: ld [%o0], %o0 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: load_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: ld [%o0], %o0 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + %2 = load atomic i32, ptr %0 seq_cst, align 4 + ret i32 %2 +} + +define void @store_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: store_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_store_4 +; SPARC32-NEXT: mov 3, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: store_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: st %o1, [%o0] +; +; SPARC32-V9-LABEL: store_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: st %o1, [%o0] +; +; SPARC64-LABEL: store_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: st %o1, [%o0] + store atomic i32 %1, ptr %0 release, align 4 + ret void +} + +define void @store_sc(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: store_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_store_4 +; SPARC32-NEXT: mov 5, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: store_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: st %o1, [%o0] +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: ldstub [%sp+-1], %g0 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: nop +; +; SPARC32-V9-LABEL: store_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: st %o1, [%o0] +; SPARC32-V9-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: nop +; +; SPARC64-LABEL: store_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: st %o1, [%o0] +; SPARC64-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: nop + store atomic i32 %1, ptr %0 seq_cst, align 4 + ret void +} + +define i32 @rmw_acq(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 2, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 acquire, align 4 + ret i32 %3 +} + +define i32 @rmw_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 3, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 release, align 4 + ret i32 %3 +} + +define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_acq_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 4, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_acq_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_acq_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_acq_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 acq_rel, align 4 + ret i32 %3 +} + +define i32 @rmw_sc(ptr %0, i32 %1) nounwind { +; SPARC32-LABEL: rmw_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i1, %o1 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: call __atomic_exchange_4 +; SPARC32-NEXT: mov 5, %o2 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore %g0, %o0, %o0 +; +; SPARC32-LEON4-LABEL: rmw_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: swap [%o0], %o1 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o1, %o0 +; +; SPARC32-V9-LABEL: rmw_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: swap [%o0], %o1 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o1, %o0 +; +; SPARC64-LABEL: rmw_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: swap [%o0], %o1 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o1, %o0 + %3 = atomicrmw xchg ptr %0, i32 %1 seq_cst, align 4 + ret i32 %3 +} + +define i32 @cas_acq(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_acq: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 2, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %o3, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_acq: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_acq: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_acq: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 acquire acquire, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_rel(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 3, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %g0, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 release monotonic, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_acq_rel(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_acq_rel: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 4, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov 2, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_acq_rel: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_acq_rel: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_acq_rel: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 acq_rel acquire, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} + +define i32 @cas_sc(ptr %0, i32 %1, i32 %2) nounwind { +; SPARC32-LABEL: cas_sc: +; SPARC32: ! %bb.0: +; SPARC32-NEXT: save %sp, -96, %sp +; SPARC32-NEXT: mov %i2, %o2 +; SPARC32-NEXT: mov %i0, %o0 +; SPARC32-NEXT: st %i1, [%fp+-4] +; SPARC32-NEXT: add %fp, -4, %o1 +; SPARC32-NEXT: mov 5, %o3 +; SPARC32-NEXT: call __atomic_compare_exchange_4 +; SPARC32-NEXT: mov %o3, %o4 +; SPARC32-NEXT: ld [%fp+-4], %i0 +; SPARC32-NEXT: ret +; SPARC32-NEXT: restore +; +; SPARC32-LEON4-LABEL: cas_sc: +; SPARC32-LEON4: ! %bb.0: +; SPARC32-LEON4-NEXT: stbar +; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2 +; SPARC32-LEON4-NEXT: retl +; SPARC32-LEON4-NEXT: mov %o2, %o0 +; +; SPARC32-V9-LABEL: cas_sc: +; SPARC32-V9: ! %bb.0: +; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore +; SPARC32-V9-NEXT: cas [%o0], %o1, %o2 +; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore +; SPARC32-V9-NEXT: retl +; SPARC32-V9-NEXT: mov %o2, %o0 +; +; SPARC64-LABEL: cas_sc: +; SPARC64: ! %bb.0: +; SPARC64-NEXT: membar #LoadStore | #StoreStore +; SPARC64-NEXT: cas [%o0], %o1, %o2 +; SPARC64-NEXT: membar #LoadLoad | #LoadStore +; SPARC64-NEXT: retl +; SPARC64-NEXT: mov %o2, %o0 + %4 = cmpxchg ptr %0, i32 %1, i32 %2 seq_cst seq_cst, align 4 + %5 = extractvalue { i32, i1 } %4, 0 + ret i32 %5 +} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_predicated_io/predicated_io_generic.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_predicated_io/predicated_io_generic.ll new file mode 100644 index 0000000..a3127e8 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_predicated_io/predicated_io_generic.ll @@ -0,0 +1,36 @@ +; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_predicated_io %s -o - | FileCheck %s + +; CHECK-ERROR: LLVM ERROR: OpPredicated[Load/Store]INTEL +; CHECK-ERROR-SAME: instructions require the following SPIR-V extension: SPV_INTEL_predicated_io + +; CHECK-DAG: Capability PredicatedIOINTEL +; CHECK-DAG: Extension "SPV_INTEL_predicated_io" + +; CHECK-DAG: %[[Int32Ty:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[IntPtrTy:[0-9]+]] = OpTypePointer CrossWorkgroup %[[Int32Ty]] +; CHECK-DAG: %[[BoolTy:[0-9]+]] = OpTypeBool +; CHECK-DAG: %[[VoidTy:[0-9]+]] = OpTypeVoid +; CHECK: %[[LoadPtr:[0-9]+]] = OpFunctionParameter %[[IntPtrTy]] +; CHECK: %[[StorePtr:[0-9]+]] = OpFunctionParameter %[[IntPtrTy]] +; CHECK: %[[DefaultVal:[0-9]+]] = OpFunctionParameter %[[Int32Ty]] +; CHECK: %[[StoreObj:[0-9]+]] = OpFunctionParameter %[[Int32Ty]] +; CHECK: %[[Predicate:[0-9]+]] = OpFunctionParameter %[[BoolTy]] +; CHECK: PredicatedLoadINTEL %[[Int32Ty]] %[[LoadPtr]] %[[Predicate]] %[[DefaultVal]] +; CHECK: PredicatedLoadINTEL %[[Int32Ty]] %[[LoadPtr]] %[[Predicate]] %[[DefaultVal]] None +; CHECK: PredicatedStoreINTEL %[[StorePtr]] %[[StoreObj]] %[[Predicate]] +; CHECK: PredicatedStoreINTEL %[[StorePtr]] %[[StoreObj]] %[[Predicate]] None + +define spir_func void @foo(ptr addrspace(1) %load_pointer, ptr addrspace(1) %store_pointer, i32 %default_value, i32 %store_object, i1 zeroext %predicate) { +entry: + %1 = call spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibi(ptr addrspace(1) %load_pointer, i1 %predicate, i32 %default_value) + %2 = call spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibii(ptr addrspace(1) %load_pointer, i1 %predicate, i32 %default_value, i32 0) + call spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiib(ptr addrspace(1) %store_pointer, i32 %store_object, i1 %predicate) + call spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiibi(ptr addrspace(1) %store_pointer, i32 %store_object, i1 %predicate, i32 0) + ret void +} + +declare spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibi(ptr addrspace(1), i1, i32) +declare spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibii(ptr addrspace(1), i1, i32, i32) +declare spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiib(ptr addrspace(1), i32, i1) +declare spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiibi(ptr addrspace(1), i32, i1, i32) diff --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll new file mode 100644 index 0000000..3fff2a8 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll @@ -0,0 +1,214 @@ +; UNSUPPORTED:expensive_checks +; RUN:llc -O0 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \ +; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-O0 %s +; RUN:llc -O1 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \ +; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s +; RUN:llc -O2 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \ +; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s +; RUN:llc -O3 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \ +; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s +; +; REQUIRES:asserts + +; SPIRV-O0:Target Library Information +; SPIRV-O0-NEXT:Target Pass Configuration +; SPIRV-O0-NEXT:Machine Module Information +; SPIRV-O0-NEXT:Target Transform Information +; SPIRV-O0-NEXT:Create Garbage Collector Module Metadata +; SPIRV-O0-NEXT:Assumption Cache Tracker +; SPIRV-O0-NEXT:Profile summary info +; SPIRV-O0-NEXT:Machine Branch Probability Analysis +; SPIRV-O0-NEXT: ModulePass Manager +; SPIRV-O0-NEXT: Pre-ISel Intrinsic Lowering +; SPIRV-O0-NEXT: FunctionPass Manager +; SPIRV-O0-NEXT: Expand large div/rem +; SPIRV-O0-NEXT: Expand fp +; SPIRV-O0-NEXT: Lower Garbage Collection Instructions +; SPIRV-O0-NEXT: Shadow Stack GC Lowering +; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG +; SPIRV-O0-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) +; SPIRV-O0-NEXT: Scalarize Masked Memory Intrinsics +; SPIRV-O0-NEXT: Expand reduction intrinsics +; SPIRV-O0-NEXT: SPIR-V Regularizer +; SPIRV-O0-NEXT: SPIRV prepare functions +; SPIRV-O0-NEXT: FunctionPass Manager +; SPIRV-O0-NEXT: Lower invoke and unwind, for unwindless code generators +; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG +; SPIRV-O0-NEXT: SPIRV strip convergent intrinsics +; SPIRV-O0-NEXT: SPIRV Legalize Implicit Binding +; SPIRV-O0-NEXT: SPIRV CBuffer Access +; SPIRV-O0-NEXT: SPIRV emit intrinsics +; SPIRV-O0-NEXT: FunctionPass Manager +; SPIRV-O0-NEXT: SPIRV legalize bitcast pass +; SPIRV-O0-NEXT: Prepare callbr +; SPIRV-O0-NEXT: Safe Stack instrumentation pass +; SPIRV-O0-NEXT: Insert stack protectors +; SPIRV-O0-NEXT: Analysis containing CSE Info +; SPIRV-O0-NEXT: IRTranslator +; SPIRV-O0-NEXT: Analysis for ComputingKnownBits +; SPIRV-O0-NEXT: MachineDominator Tree Construction +; SPIRV-O0-NEXT: SPIRVPreLegalizerCombiner +; SPIRV-O0-NEXT: SPIRV pre legalizer +; SPIRV-O0-NEXT: Analysis containing CSE Info +; SPIRV-O0-NEXT: Legalizer +; SPIRV-O0-NEXT: SPIRV post legalizer +; SPIRV-O0-NEXT: Analysis for ComputingKnownBits +; SPIRV-O0-NEXT: Dominator Tree Construction +; SPIRV-O0-NEXT: Natural Loop Information +; SPIRV-O0-NEXT: Lazy Branch Probability Analysis +; SPIRV-O0-NEXT: Lazy Block Frequency Analysis +; SPIRV-O0-NEXT: InstructionSelect +; SPIRV-O0-NEXT: ResetMachineFunction +; SPIRV-O0-NEXT: Finalize ISel and expand pseudo-instructions +; SPIRV-O0-NEXT: Local Stack Slot Allocation +; SPIRV-O0-NEXT: Remove Redundant DEBUG_VALUE analysis +; SPIRV-O0-NEXT: Fixup Statepoint Caller Saved +; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis +; SPIRV-O0-NEXT: Machine Optimization Remark Emitter +; SPIRV-O0-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; SPIRV-O0-NEXT: Post-RA pseudo instruction expansion pass +; SPIRV-O0-NEXT: Analyze Machine Code For Garbage Collection +; SPIRV-O0-NEXT: Insert fentry calls +; SPIRV-O0-NEXT: Insert XRay ops +; SPIRV-O0-NEXT: Machine Sanitizer Binary Metadata +; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis +; SPIRV-O0-NEXT: Machine Optimization Remark Emitter +; SPIRV-O0-NEXT: Stack Frame Layout Analysis +; SPIRV-O0-NEXT: SPIRV module analysis +; SPIRV-O0-NEXT: FunctionPass Manager +; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis +; SPIRV-O0-NEXT: Machine Optimization Remark Emitter +; SPIRV-O0-NEXT: SPIRV Assembly Printer +; SPIRV-O0-NEXT: Free MachineFunction + +; SPIRV-Opt:Target Library Information +; SPIRV-Opt-NEXT:Target Pass Configuration +; SPIRV-Opt-NEXT:Machine Module Information +; SPIRV-Opt-NEXT:Target Transform Information +; SPIRV-Opt-NEXT:Assumption Cache Tracker +; SPIRV-Opt-NEXT:Type-Based Alias Analysis +; SPIRV-Opt-NEXT:Scoped NoAlias Alias Analysis +; SPIRV-Opt-NEXT:Profile summary info +; SPIRV-Opt-NEXT:Create Garbage Collector Module Metadata +; SPIRV-Opt-NEXT:Machine Branch Probability Analysis +; SPIRV-Opt-NEXT: ModulePass Manager +; SPIRV-Opt-NEXT: Pre-ISel Intrinsic Lowering +; SPIRV-Opt-NEXT: FunctionPass Manager +; SPIRV-Opt-NEXT: Expand large div/rem +; SPIRV-Opt-NEXT: Expand fp +; SPIRV-Opt-NEXT: Dominator Tree Construction +; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl) +; SPIRV-Opt-NEXT: Natural Loop Information +; SPIRV-Opt-NEXT: Canonicalize natural loops +; SPIRV-Opt-NEXT: Scalar Evolution Analysis +; SPIRV-Opt-NEXT: Loop Pass Manager +; SPIRV-Opt-NEXT: Canonicalize Freeze Instructions in Loops +; SPIRV-Opt-NEXT: Induction Variable Users +; SPIRV-Opt-NEXT: Loop Strength Reduction +; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl) +; SPIRV-Opt-NEXT: Function Alias Analysis Results +; SPIRV-Opt-NEXT: Merge contiguous icmps into a memcmp +; SPIRV-Opt-NEXT: Natural Loop Information +; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis +; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis +; SPIRV-Opt-NEXT: Expand memcmp() to load/stores +; SPIRV-Opt-NEXT: Lower Garbage Collection Instructions +; SPIRV-Opt-NEXT: Shadow Stack GC Lowering +; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG +; SPIRV-Opt-NEXT: Natural Loop Information +; SPIRV-Opt-NEXT: Post-Dominator Tree Construction +; SPIRV-Opt-NEXT: Branch Probability Analysis +; SPIRV-Opt-NEXT: Block Frequency Analysis +; SPIRV-Opt-NEXT: Constant Hoisting +; SPIRV-Opt-NEXT: Replace intrinsics with calls to vector library +; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis +; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis +; SPIRV-Opt-NEXT: Optimization Remark Emitter +; SPIRV-Opt-NEXT: Partially inline calls to library functions +; SPIRV-Opt-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) +; SPIRV-Opt-NEXT: Scalarize Masked Memory Intrinsics +; SPIRV-Opt-NEXT: Expand reduction intrinsics +; SPIRV-Opt-NEXT: SPIR-V Regularizer +; SPIRV-Opt-NEXT: SPIRV prepare functions +; SPIRV-Opt-NEXT: FunctionPass Manager +; SPIRV-Opt-NEXT: Dominator Tree Construction +; SPIRV-Opt-NEXT: Natural Loop Information +; SPIRV-Opt-NEXT: CodeGen Prepare +; SPIRV-Opt-NEXT: Lower invoke and unwind, for unwindless code generators +; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG +; SPIRV-Opt-NEXT: SPIRV strip convergent intrinsics +; SPIRV-Opt-NEXT: SPIRV Legalize Implicit Binding +; SPIRV-Opt-NEXT: SPIRV CBuffer Access +; SPIRV-Opt-NEXT: SPIRV emit intrinsics +; SPIRV-Opt-NEXT: FunctionPass Manager +; SPIRV-Opt-NEXT: SPIRV legalize bitcast pass +; SPIRV-Opt-NEXT: Dominator Tree Construction +; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl) +; SPIRV-Opt-NEXT: Function Alias Analysis Results +; SPIRV-Opt-NEXT: ObjC ARC contraction +; SPIRV-Opt-NEXT: Prepare callbr +; SPIRV-Opt-NEXT: Safe Stack instrumentation pass +; SPIRV-Opt-NEXT: Insert stack protectors +; SPIRV-Opt-NEXT: Analysis containing CSE Info +; SPIRV-Opt-NEXT: Natural Loop Information +; SPIRV-Opt-NEXT: Post-Dominator Tree Construction +; SPIRV-Opt-NEXT: Branch Probability Analysis +; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl) +; SPIRV-Opt-NEXT: Function Alias Analysis Results +; SPIRV-Opt-NEXT: IRTranslator +; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits +; SPIRV-Opt-NEXT: MachineDominator Tree Construction +; SPIRV-Opt-NEXT: SPIRVPreLegalizerCombiner +; SPIRV-Opt-NEXT: SPIRV pre legalizer +; SPIRV-Opt-NEXT: Analysis containing CSE Info +; SPIRV-Opt-NEXT: Legalizer +; SPIRV-Opt-NEXT: SPIRV post legalizer +; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits +; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis +; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis +; SPIRV-Opt-NEXT: InstructionSelect +; SPIRV-Opt-NEXT: ResetMachineFunction +; SPIRV-Opt-NEXT: Finalize ISel and expand pseudo-instructions +; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis +; SPIRV-Opt-NEXT: Early Tail Duplication +; SPIRV-Opt-NEXT: Optimize machine instruction PHIs +; SPIRV-Opt-NEXT: Slot index numbering +; SPIRV-Opt-NEXT: Merge disjoint stack slots +; SPIRV-Opt-NEXT: Local Stack Slot Allocation +; SPIRV-Opt-NEXT: Remove dead machine instructions +; SPIRV-Opt-NEXT: MachineDominator Tree Construction +; SPIRV-Opt-NEXT: Machine Natural Loop Construction +; SPIRV-Opt-NEXT: Machine Block Frequency Analysis +; SPIRV-Opt-NEXT: Early Machine Loop Invariant Code Motion +; SPIRV-Opt-NEXT: MachineDominator Tree Construction +; SPIRV-Opt-NEXT: Machine Block Frequency Analysis +; SPIRV-Opt-NEXT: Machine Common Subexpression Elimination +; SPIRV-Opt-NEXT: MachinePostDominator Tree Construction +; SPIRV-Opt-NEXT: Machine Cycle Info Analysis +; SPIRV-Opt-NEXT: Machine code sinking +; SPIRV-Opt-NEXT: Peephole Optimizations +; SPIRV-Opt-NEXT: Remove dead machine instructions +; SPIRV-Opt-NEXT: Remove Redundant DEBUG_VALUE analysis +; SPIRV-Opt-NEXT: Fixup Statepoint Caller Saved +; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis +; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter +; SPIRV-Opt-NEXT: Prologue/Epilogue Insertion & Frame Finalization +; SPIRV-Opt-NEXT: Tail Duplication +; SPIRV-Opt-NEXT: Post-RA pseudo instruction expansion pass +; SPIRV-Opt-NEXT: Analyze Machine Code For Garbage Collection +; SPIRV-Opt-NEXT: Insert fentry calls +; SPIRV-Opt-NEXT: Insert XRay ops +; SPIRV-Opt-NEXT: Machine Sanitizer Binary Metadata +; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis +; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter +; SPIRV-Opt-NEXT: Stack Frame Layout Analysis +; SPIRV-Opt-NEXT: SPIRV module analysis +; SPIRV-Opt-NEXT: FunctionPass Manager +; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis +; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter +; SPIRV-Opt-NEXT: SPIRV Assembly Printer +; SPIRV-Opt-NEXT: Free MachineFunction + +define void @empty() { + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll index 52f57dc..a8d37be 100644 --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -434,7 +434,6 @@ entry: define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16: ; CHECK: .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: call __truncsfhf2 @@ -474,15 +473,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 -; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 -; CHECK-NEXT: local.tee 9 -; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 10 -; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat @@ -495,13 +485,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 -; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 10 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i16x8.narrow_i32x4_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -516,7 +500,6 @@ entry: define <8 x i16> @utest_f16i16(<8 x half> %x) { ; CHECK-LABEL: utest_f16i16: ; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: call __truncsfhf2 @@ -556,9 +539,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) { ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat @@ -571,8 +551,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) { ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return entry: @@ -1861,7 +1839,6 @@ entry: define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16_mm: ; CHECK: .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: call __truncsfhf2 @@ -1901,15 +1878,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 -; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 -; CHECK-NEXT: local.tee 9 -; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 10 -; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat @@ -1922,13 +1890,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 -; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: local.get 10 -; CHECK-NEXT: v128.and -; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: i16x8.narrow_i32x4_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -1941,7 +1903,6 @@ entry: define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: utest_f16i16_mm: ; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 5 ; CHECK-NEXT: call __truncsfhf2 @@ -1981,9 +1942,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat @@ -1996,8 +1954,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) { ; CHECK-NEXT: local.get 7 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return entry: diff --git a/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll new file mode 100644 index 0000000..f3f3ba9 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2 +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2 + +define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: i16_signed: +; CHECK: .functype i16_signed (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: # %bb2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.narrow_i16x8_s +; CHECK-NEXT: # fallthrough-return +bb2: + %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128)) + %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127)) + %3 = trunc nsw <16 x i16> %2 to <16 x i8> + ret <16 x i8> %3 + ret <16 x i8> %3 +} + +define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: i32_signed: +; CHECK: .functype i32_signed (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: # %bb2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i16x8.narrow_i32x4_s +; CHECK-NEXT: # fallthrough-return +bb2: + %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768)) + %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767)) + %3 = trunc nsw <8 x i32> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define <8 x i16> @i32_signed_flipped(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: i32_signed_flipped: +; CHECK: .functype i32_signed_flipped (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: # %bb2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i16x8.narrow_i32x4_s +; CHECK-NEXT: # fallthrough-return +bb2: + %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %1 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> splat (i32 32767), <8 x i32> %0) + %2 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> splat (i32 -32768), <8 x i32> %1) + %3 = trunc nsw <8 x i32> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: i16_unsigned: +; CHECK: .functype i16_unsigned (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: # %bb2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.narrow_i16x8_u +; CHECK-NEXT: # fallthrough-return +bb2: + %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255)) + %2 = trunc nuw <16 x i16> %1 to <16 x i8> + ret <16 x i8> %2 +} + +define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: i32_unsigned: +; CHECK: .functype i32_unsigned (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: # %bb2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i16x8.narrow_i32x4_u +; CHECK-NEXT: # fallthrough-return +bb2: + %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535)) + %2 = trunc nsw <8 x i32> %1 to <8 x i16> + ret <8 x i16> %2 +} diff --git a/llvm/test/CodeGen/X86/and-mask-variable.ll b/llvm/test/CodeGen/X86/and-mask-variable.ll new file mode 100644 index 0000000..d89f0db --- /dev/null +++ b/llvm/test/CodeGen/X86/and-mask-variable.ll @@ -0,0 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-NOBMI +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-BMI2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-BMI2 + +define i32 @mask_pair(i32 %x, i32 %y) nounwind { +; X86-NOBMI-LABEL: mask_pair: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: shrl %cl, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: retl +; +; X86-BMI2-LABEL: mask_pair: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X86-BMI2-NEXT: retl +; +; X64-NOBMI-LABEL: mask_pair: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: movl %edi, %eax +; X64-NOBMI-NEXT: shrl %cl, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI2-LABEL: mask_pair: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax +; X64-BMI2-NEXT: retq + %shl = shl nsw i32 -1, %y + %and = and i32 %shl, %x + ret i32 %and +} + +define i64 @mask_pair_64(i64 %x, i64 %y) nounwind { +; X86-NOBMI-LABEL: mask_pair_64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl $-1, %edx +; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: je .LBB1_2 +; X86-NOBMI-NEXT: # %bb.1: +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: xorl %eax, %eax +; X86-NOBMI-NEXT: .LBB1_2: +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: retl +; +; X86-BMI2-LABEL: mask_pair_64: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl $-1, %edx +; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax +; X86-BMI2-NEXT: testb $32, %cl +; X86-BMI2-NEXT: je .LBB1_2 +; X86-BMI2-NEXT: # %bb.1: +; X86-BMI2-NEXT: movl %eax, %edx +; X86-BMI2-NEXT: xorl %eax, %eax +; X86-BMI2-NEXT: .LBB1_2: +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: retl +; +; X64-NOBMI-LABEL: mask_pair_64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: movq %rdi, %rax +; X64-NOBMI-NEXT: shrq %cl, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI2-LABEL: mask_pair_64: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax +; X64-BMI2-NEXT: retq + %shl = shl nsw i64 -1, %y + %and = and i64 %shl, %x + ret i64 %and +} + +define i128 @mask_pair_128(i128 %x, i128 %y) nounwind { +; X86-NOBMI-LABEL: mask_pair_128: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %ebx +; X86-NOBMI-NEXT: pushl %edi +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: subl $32, %esp +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NOBMI-NEXT: movl $0, (%esp) +; X86-NOBMI-NEXT: movl %ecx, %edx +; X86-NOBMI-NEXT: shrb $3, %dl +; X86-NOBMI-NEXT: andb $12, %dl +; X86-NOBMI-NEXT: negb %dl +; X86-NOBMI-NEXT: movsbl %dl, %ebx +; X86-NOBMI-NEXT: movl 24(%esp,%ebx), %edx +; X86-NOBMI-NEXT: movl 28(%esp,%ebx), %esi +; X86-NOBMI-NEXT: shldl %cl, %edx, %esi +; X86-NOBMI-NEXT: movl 16(%esp,%ebx), %edi +; X86-NOBMI-NEXT: movl 20(%esp,%ebx), %ebx +; X86-NOBMI-NEXT: shldl %cl, %ebx, %edx +; X86-NOBMI-NEXT: shldl %cl, %edi, %ebx +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: shll %cl, %edi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %ebx +; X86-NOBMI-NEXT: movl %esi, 12(%eax) +; X86-NOBMI-NEXT: movl %edx, 8(%eax) +; X86-NOBMI-NEXT: movl %ebx, 4(%eax) +; X86-NOBMI-NEXT: movl %edi, (%eax) +; X86-NOBMI-NEXT: addl $32, %esp +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: popl %edi +; X86-NOBMI-NEXT: popl %ebx +; X86-NOBMI-NEXT: retl $4 +; +; X86-BMI2-LABEL: mask_pair_128: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: pushl %ebx +; X86-BMI2-NEXT: pushl %edi +; X86-BMI2-NEXT: pushl %esi +; X86-BMI2-NEXT: subl $32, %esp +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $-1, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movl $0, (%esp) +; X86-BMI2-NEXT: movl %ecx, %edx +; X86-BMI2-NEXT: shrb $3, %dl +; X86-BMI2-NEXT: andb $12, %dl +; X86-BMI2-NEXT: negb %dl +; X86-BMI2-NEXT: movsbl %dl, %edi +; X86-BMI2-NEXT: movl 24(%esp,%edi), %edx +; X86-BMI2-NEXT: movl 28(%esp,%edi), %esi +; X86-BMI2-NEXT: shldl %cl, %edx, %esi +; X86-BMI2-NEXT: movl 16(%esp,%edi), %ebx +; X86-BMI2-NEXT: movl 20(%esp,%edi), %edi +; X86-BMI2-NEXT: shldl %cl, %edi, %edx +; X86-BMI2-NEXT: shldl %cl, %ebx, %edi +; X86-BMI2-NEXT: shlxl %ecx, %ebx, %ecx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edi +; X86-BMI2-NEXT: movl %esi, 12(%eax) +; X86-BMI2-NEXT: movl %edx, 8(%eax) +; X86-BMI2-NEXT: movl %edi, 4(%eax) +; X86-BMI2-NEXT: movl %ecx, (%eax) +; X86-BMI2-NEXT: addl $32, %esp +; X86-BMI2-NEXT: popl %esi +; X86-BMI2-NEXT: popl %edi +; X86-BMI2-NEXT: popl %ebx +; X86-BMI2-NEXT: retl $4 +; +; X64-NOBMI-LABEL: mask_pair_128: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdx, %rcx +; X64-NOBMI-NEXT: movq $-1, %rdx +; X64-NOBMI-NEXT: movq $-1, %r8 +; X64-NOBMI-NEXT: shlq %cl, %r8 +; X64-NOBMI-NEXT: xorl %eax, %eax +; X64-NOBMI-NEXT: testb $64, %cl +; X64-NOBMI-NEXT: cmovneq %r8, %rdx +; X64-NOBMI-NEXT: cmoveq %r8, %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: andq %rsi, %rdx +; X64-NOBMI-NEXT: retq +; +; X64-BMI2-LABEL: mask_pair_128: +; X64-BMI2: # %bb.0: +; X64-BMI2-NEXT: movq $-1, %rcx +; X64-BMI2-NEXT: shlxq %rdx, %rcx, %r8 +; X64-BMI2-NEXT: xorl %eax, %eax +; X64-BMI2-NEXT: testb $64, %dl +; X64-BMI2-NEXT: cmovneq %r8, %rcx +; X64-BMI2-NEXT: cmoveq %r8, %rax +; X64-BMI2-NEXT: andq %rdi, %rax +; X64-BMI2-NEXT: andq %rsi, %rcx +; X64-BMI2-NEXT: movq %rcx, %rdx +; X64-BMI2-NEXT: retq + %shl = shl nsw i128 -1, %y + %and = and i128 %shl, %x + ret i128 %and +} diff --git a/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll new file mode 100644 index 0000000..c302d41 --- /dev/null +++ b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 < %s -o - | FileCheck %s + +define i64 @ptrtoaddr(ptr %p) { +; CHECK-LABEL: ptrtoaddr: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: retq + %addr = ptrtoaddr ptr %p to i64 + ret i64 %addr +} diff --git a/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll b/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll new file mode 100644 index 0000000..92aedfe --- /dev/null +++ b/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll @@ -0,0 +1,36 @@ +; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=1 | FileCheck %s +; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=10 | FileCheck %s +; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=100 | FileCheck %s + +; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=1 -fast-isel=true | FileCheck %s +; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=10 -fast-isel=true | FileCheck %s +; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=100 -fast-isel=true | FileCheck %s + +; This test has the same purpose as the instr-ref-opt-bisect.ll, to check if +; during opt-bisect's optimisation level change we won't run into an assert. +; This is simply testing different IR. + +; CHECK: DBG_VALUE + +target triple = "x86_64-pc-windows-msvc" + +define i1 @foo(i32 %arg) !dbg !3 { +entry: + #dbg_value(i32 %arg, !4, !DIExpression(), !5) + switch i32 %arg, label %bb [ + i32 810, label %bb + ], !dbg !5 +bb: + %a = load volatile i1, ptr null, align 1 + ret i1 false +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1) +!1 = !DIFile(filename: "instr-ref-opt-bisect2.ll", directory: ".") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = distinct !DISubprogram(name: "instr-ref-opt-bisect2", file: !1, unit: !0) +!4 = !DILocalVariable(name: "arg", arg: 2, scope: !3) +!5 = !DILocation(line: 0, scope: !3) diff --git a/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll deleted file mode 100644 index a4846176..0000000 --- a/llvm/test/Instrumentation/AddressSanitizer/alloca-offset-lifetime.ll +++ /dev/null @@ -1,27 +0,0 @@ -; Test that ASAN will not instrument lifetime markers on alloca offsets. -; -; RUN: opt < %s -passes=asan --asan-use-after-scope -S | FileCheck %s - -target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.15.0" - -%t = type { ptr, ptr, %sub, i64 } -%sub = type { i32 } - -define void @foo() sanitize_address { -entry: - %0 = alloca %t, align 8 - %x = getelementptr inbounds %t, ptr %0, i64 0, i32 2 - call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) - call void @bar(ptr nonnull %x) - call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) #3 - ret void -} - -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) -declare void @bar(ptr) -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) - -; CHECK: store i64 %[[STACK_BASE:.+]], ptr %asan_local_stack_base, align 8 -; CHECK-NOT: store i8 0 -; CHECK: call void @bar(ptr nonnull %x) diff --git a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll index 5f08552..0e382b2 100644 --- a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll +++ b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll @@ -38,7 +38,7 @@ entry: ret ptr %ptr1 } -!0 = !{!"int"} +!0 = !{!"int", i1 0} ;. -; CHECK: [[META0]] = !{!"int"} +; CHECK: [[META0]] = !{!"int", i1 false} ;. diff --git a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll index e023ab6b..19673da 100644 --- a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll +++ b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll @@ -79,7 +79,7 @@ entry: ret ptr %ptr1 } -!0 = !{!"int"} +!0 = !{!"int", i1 0} ;. -; CHECK: [[META0]] = !{!"int"} +; CHECK: [[META0]] = !{!"int", i1 false} ;. diff --git a/llvm/test/Instrumentation/AllocToken/remark.ll b/llvm/test/Instrumentation/AllocToken/remark.ll index a2404526..f2eaa62 100644 --- a/llvm/test/Instrumentation/AllocToken/remark.ll +++ b/llvm/test/Instrumentation/AllocToken/remark.ll @@ -32,7 +32,7 @@ entry: ret ptr %ptr1 } -!0 = !{!"int"} +!0 = !{!"int", i1 0} ;. -; CHECK: [[META0]] = !{!"int"} +; CHECK: [[META0]] = !{!"int", i1 false} ;. diff --git a/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll b/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll new file mode 100644 index 0000000..1f77648 --- /dev/null +++ b/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +declare ptr @malloc(i64) + +define void @test_typehashpointersplit() sanitize_alloc_token { +; CHECK-LABEL: define void @test_typehashpointersplit( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 4, i64 0), !alloc_token [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__alloc_token_malloc(i64 128, i64 0), !alloc_token [[META1:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__alloc_token_malloc(i64 8, i64 1), !alloc_token [[META2:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = call ptr @__alloc_token_malloc(i64 64, i64 1), !alloc_token [[META3:![0-9]+]] +; CHECK-NEXT: ret void +; +entry: + call ptr @malloc(i64 4), !alloc_token !0 + call ptr @malloc(i64 128), !alloc_token !1 + call ptr @malloc(i64 8), !alloc_token !2 + call ptr @malloc(i64 64), !alloc_token !3 + ret void +} + +!0 = !{!"int", i1 0} +!1 = !{!"Foo", i1 0} +!2 = !{!"int*", i1 1} +!3 = !{!"Foo", i1 1} +;. +; CHECK: [[META0]] = !{!"int", i1 false} +; CHECK: [[META1]] = !{!"Foo", i1 false} +; CHECK: [[META2]] = !{!"int*", i1 true} +; CHECK: [[META3]] = !{!"Foo", i1 true} +;. diff --git a/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll index 3568434..07b9a1c 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/missing_dbg.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=2 -S | FileCheck %s +; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-stack-depth -sanitizer-coverage-stack-depth-callback-min=1 -S | FileCheck %s --check-prefix=CHECK-STACK-CALLBACK +; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-stack-depth -S | FileCheck %s --check-prefix=CHECK-STACK-DEPTH target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -55,6 +57,86 @@ entry: ret i32 %t } +define i32 @with_dbg_stack_callback(ptr %a) !dbg !8 { +; CHECK-STACK-CALLBACK-LABEL: define i32 @with_dbg_stack_callback( +; CHECK-STACK-CALLBACK-SAME: ptr [[A:%.*]]) !dbg [[DBG8:![0-9]+]] { +; CHECK-STACK-CALLBACK-NEXT: entry: +; CHECK-STACK-CALLBACK-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-CALLBACK-NEXT: call void @__sanitizer_cov_stack_depth() #[[ATTR1:[0-9]+]], !dbg [[DBG9:![0-9]+]] +; CHECK-STACK-CALLBACK-NEXT: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-CALLBACK-NEXT: call void @external_func() +; CHECK-STACK-CALLBACK-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +define i32 @with_dbg_stack_depth(ptr %a) !dbg !10 { +; CHECK-STACK-DEPTH-LABEL: define i32 @with_dbg_stack_depth( +; CHECK-STACK-DEPTH-SAME: ptr [[A:%.*]]) !dbg [[DBG10:![0-9]+]] { +; CHECK-STACK-DEPTH-NEXT: entry: +; CHECK-STACK-DEPTH-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-DEPTH-NEXT: [[TMP1:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) +; CHECK-STACK-DEPTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-STACK-DEPTH-NEXT: [[TMP3:%.*]] = load i64, ptr @__sancov_lowest_stack, align 8 +; CHECK-STACK-DEPTH-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP2]], [[TMP3]] +; CHECK-STACK-DEPTH-NEXT: br i1 [[TMP4]], label {{%.*}}, label {{%.*}} +; CHECK-STACK-DEPTH: store i64 [[TMP2]], ptr @__sancov_lowest_stack, align 8, !dbg [[DBG11:![0-9]+]], {{.*}}!nosanitize +; CHECK-STACK-DEPTH: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-DEPTH-NEXT: call void @external_func() +; CHECK-STACK-DEPTH-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +define i32 @without_dbg_stack_callback(ptr %a) { +; CHECK-STACK-CALLBACK-LABEL: define i32 @without_dbg_stack_callback( +; CHECK-STACK-CALLBACK-SAME: ptr [[A:%.*]]) { +; CHECK-STACK-CALLBACK-NEXT: entry: +; CHECK-STACK-CALLBACK-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-CALLBACK-NEXT: call void @__sanitizer_cov_stack_depth() #[[ATTR1]] +; CHECK-STACK-CALLBACK-NEXT: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-CALLBACK-NEXT: call void @external_func() +; CHECK-STACK-CALLBACK-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +define i32 @without_dbg_stack_depth(ptr %a) { +; CHECK-STACK-DEPTH-LABEL: define i32 @without_dbg_stack_depth( +; CHECK-STACK-DEPTH-SAME: ptr [[A:%.*]]) { +; CHECK-STACK-DEPTH-NEXT: entry: +; CHECK-STACK-DEPTH-NEXT: [[BUF:%.*]] = alloca [64 x i8], align 1 +; CHECK-STACK-DEPTH-NEXT: [[TMP1:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) +; CHECK-STACK-DEPTH-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-STACK-DEPTH-NEXT: [[TMP3:%.*]] = load i64, ptr @__sancov_lowest_stack, align 8 +; CHECK-STACK-DEPTH-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP2]], [[TMP3]] +; CHECK-STACK-DEPTH-NEXT: br i1 [[TMP4]], label {{%.*}}, label {{%.*}} +; CHECK-STACK-DEPTH: store i64 [[TMP2]], ptr @__sancov_lowest_stack, align 8, {{.*}}!nosanitize +; CHECK-STACK-DEPTH: %t = load i32, ptr [[A]], align 4 +; CHECK-STACK-DEPTH-NEXT: call void @external_func() +; CHECK-STACK-DEPTH-NEXT: ret i32 %t +; +entry: + %buf = alloca [64 x i8], align 1 + %t = load i32, ptr %a, align 4 + call void @external_func() + ret i32 %t +} + +declare void @external_func() + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2} @@ -66,6 +148,10 @@ entry: !5 = !{} !6 = !DILocation(line: 192, scope: !3) !7 = !DILocation(line: 0, scope: !3) +!8 = distinct !DISubprogram(name: "with_dbg_stack_callback", scope: !1, file: !1, line: 200, type: !4, scopeLine: 200, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!9 = !DILocation(line: 200, scope: !8) +!10 = distinct !DISubprogram(name: "with_dbg_stack_depth", scope: !1, file: !1, line: 210, type: !4, scopeLine: 210, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!11 = !DILocation(line: 210, scope: !10) ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C89, file: [[META1:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None) @@ -76,3 +162,9 @@ entry: ; CHECK: [[DBG6]] = !DILocation(line: 192, scope: [[DBG3]]) ; CHECK: [[DBG7]] = !DILocation(line: 0, scope: [[DBG3]]) ;. +; CHECK-STACK-CALLBACK: [[DBG8]] = distinct !DISubprogram(name: "with_dbg_stack_callback", scope: {{.*}}, file: {{.*}}, line: 200 +; CHECK-STACK-CALLBACK: [[DBG9]] = !DILocation(line: 200, scope: [[DBG8]]) +;. +; CHECK-STACK-DEPTH: [[DBG10]] = distinct !DISubprogram(name: "with_dbg_stack_depth", scope: {{.*}}, file: {{.*}}, line: 210 +; CHECK-STACK-DEPTH: [[DBG11]] = !DILocation(line: 210, scope: [[DBG10]]) +;. diff --git a/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s b/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s new file mode 100644 index 0000000..f8baf37 --- /dev/null +++ b/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s @@ -0,0 +1,95 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ERROR + +tlbip ALLE1 +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE1IS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE1ISNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE1NXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE1OS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE1OSNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE2 +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE2IS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE2ISNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE2NXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE2OS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE2OSNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE3 +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE3IS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE3ISNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE3NXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE3OS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ALLE3OSNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ASIDE1 +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ASIDE1IS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ASIDE1ISNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ASIDE1NXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ASIDE1OS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip ASIDE1OSNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip PAALL +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip PAALLOS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip RPALOS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip RPAOS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLE1 +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLE1IS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLE1ISNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLE1NXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLE1OS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLE1OSNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLS12E1 +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLS12E1IS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLS12E1ISNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLS12E1NXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLS12E1OS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLS12E1OSNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLWS2E1 +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLWS2E1IS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLWS2E1ISNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLWS2E1NXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLWS2E1OS +// CHECK-ERROR: error: invalid operand for TLBIP instruction +tlbip VMALLWS2E1OSNXS +// CHECK-ERROR: error: invalid operand for TLBIP instruction diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s index a21f762..73653d0 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_salu_lit64.s @@ -1,64 +1,71 @@ -// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s s_mov_b64 s[2:3], 0x10abcdef12345678 -// GFX1250: s_mov_b64 s[2:3], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_mov_b64 s[2:3], 0x10abcdef12345678 ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_add_nc_u64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5] -// GFX1250: s_mul_u64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5] -// GFX1250: s_and_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_or_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5] -// GFX1250: s_xor_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678 -// GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), lit64(0x10abcdef12345678) ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678 ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_andn2_b64 s[2:3], 0x10abcdef12345678, s[4:5] -// GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_orn2_b64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_nand_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_nor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_xnor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_lshl_b64 s[2:3], 0x10abcdef12345678, s4 -// GFX1250: s_lshl_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_lshl_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_lshr_b64 s[2:3], 0x10abcdef12345678, s4 -// GFX1250: s_lshr_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_lshr_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_ashr_i64 s[2:3], 0x10abcdef12345678, s4 -// GFX1250: s_ashr_i64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_ashr_i64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_bfe_u64 s[2:3], 0x10abcdef12345678, 5 -// GFX1250: s_bfe_u64 s[2:3], lit64(0x10abcdef12345678), 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_bfe_u64 s[2:3], 0x10abcdef12345678, 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_bfe_i64 s[2:3], 0x80abcdef12345678, 5 -// GFX1250: s_bfe_i64 s[2:3], lit64(0x80abcdef12345678), 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80] +// GFX1250: s_bfe_i64 s[2:3], 0x80abcdef12345678, 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80] s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 -// GFX1250: s_cselect_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] s_mov_b64 s[2:3], 0xffffffff01234567 -// GFX1250: s_mov_b64 s[2:3], lit64(0xffffffff01234567) ; encoding: [0xfe,0x01,0x82,0xbe,0x67,0x45,0x23,0x01,0xff,0xff,0xff,0xff] +// GFX1250: s_mov_b64 s[2:3], 0xffffffff01234567 ; encoding: [0xfe,0x01,0x82,0xbe,0x67,0x45,0x23,0x01,0xff,0xff,0xff,0xff] +// TODO: disasm s_mov_b64 s[2:3], lit64(0x777) -// GFX1250: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00] +// GFX1250-ASM: s_mov_b64 s[2:3], lit64(0x777) ; encoding: [0xfe,0x01,0x82,0xbe,0x77,0x07,0x00,0x00,0x00,0x00,0x00,0x00] +// GFX1250-DIS: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00] + +s_mov_b64 s[2:3], 0x777 +// GFX1250: s_mov_b64 s[2:3], 0x777 ; encoding: [0xff,0x01,0x82,0xbe,0x77,0x07,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s index 5cf484f..cc351af 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sop1.s @@ -1,61 +1,63 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s s_add_pc_i64 s[2:3] // GFX1250: s_add_pc_i64 s[2:3] ; encoding: [0x02,0x4b,0x80,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_add_pc_i64 4 // GFX1250: s_add_pc_i64 4 ; encoding: [0x84,0x4b,0x80,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_add_pc_i64 100 // GFX1250: s_add_pc_i64 0x64 ; encoding: [0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_add_pc_i64 0x12345678abcd0 -// GFX1250: s_add_pc_i64 lit64(0x12345678abcd0) ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX1250: s_add_pc_i64 0x12345678abcd0 ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_get_pc_i64 s[2:3] // GFX1250: s_get_pc_i64 s[2:3] ; encoding: [0x00,0x47,0x82,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_getpc_b64 s[2:3] // GFX1250: s_get_pc_i64 s[2:3] ; encoding: [0x00,0x47,0x82,0xbe] s_set_pc_i64 s[2:3] // GFX1250: s_set_pc_i64 s[2:3] ; encoding: [0x02,0x48,0x80,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_setpc_b64 s[2:3] // GFX1250: s_set_pc_i64 s[2:3] ; encoding: [0x02,0x48,0x80,0xbe] s_swap_pc_i64 s[2:3], 10 // GFX1250: s_swap_pc_i64 s[2:3], 10 ; encoding: [0x8a,0x49,0x82,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_swappc_b64 s[2:3], 10 // GFX1250: s_swap_pc_i64 s[2:3], 10 ; encoding: [0x8a,0x49,0x82,0xbe] s_rfe_i64 s[2:3] // GFX1250: s_rfe_i64 s[2:3] ; encoding: [0x02,0x4a,0x80,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_rfe_b64 s[2:3] // GFX1250: s_rfe_i64 s[2:3] ; encoding: [0x02,0x4a,0x80,0xbe] s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) // GFX1250: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) ; encoding: [0x88,0x4c,0x82,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: specified message id is not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:31: error: specified message id is not supported on this GPU s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) // GFX1250: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE) ; encoding: [0x88,0x4d,0x82,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: specified message id is not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:35: error: specified message id is not supported on this GPU s_get_shader_cycles_u64 s[2:3] // GFX1250: s_get_shader_cycles_u64 s[2:3] ; encoding: [0x00,0x06,0x82,0xbe] -// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU s_barrier_signal -3 // GFX1250: s_barrier_signal -3 ; encoding: [0xc3,0x4e,0x80,0xbe] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s b/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s index 7395a51..58da119 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_valu_lit64.s @@ -1,211 +1,213 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s v_ceil_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_ceil_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cvt_f32_f64 v255, 0x10abcdef12345678 -// GFX1250: v_cvt_f32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cvt_f32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cvt_i32_f64 v255, 0x10abcdef12345678 -// GFX1250: v_cvt_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cvt_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cvt_u32_f64 v255, 0x10abcdef12345678 -// GFX1250: v_cvt_u32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cvt_u32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_floor_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_floor_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_floor_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_fract_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_fract_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_fract_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_frexp_exp_i32_f64 v255, 0x10abcdef12345678 -// GFX1250: v_frexp_exp_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_frexp_exp_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_frexp_mant_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_frexp_mant_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_frexp_mant_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_rcp_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_rcp_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_rcp_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_rndne_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_rndne_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_rndne_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_rsq_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_rsq_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_rsq_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_sqrt_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_sqrt_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_sqrt_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_trunc_f64 v[254:255], 0x10abcdef12345678 -// GFX1250: v_trunc_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_trunc_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_add_f64 v[254:255], 0x10abcdef12345678, v[254:255] -// GFX1250: v_add_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_add_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_max_num_f64 v[254:255], 0x10abcdef12345678, v[254:255] -// GFX1250: v_max_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_max_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_min_num_f64 v[254:255], 0x10abcdef12345678, v[254:255] -// GFX1250: v_min_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_min_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_mul_f64 v[254:255], 0x10abcdef12345678, v[254:255] -// GFX1250: v_mul_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_mul_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_class_f64 vcc_lo, 0x10abcdef12345678, v255 -// GFX1250: v_cmp_class_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_class_f64_e32 vcc_lo, 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_eq_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_eq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_eq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_ge_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_ge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_ge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_gt_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_gt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_gt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_gt_i64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_gt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_gt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_gt_u64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_gt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_gt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_le_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_le_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_le_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_le_i64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_le_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_le_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_le_u64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_le_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_le_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_lg_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_lg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_lg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_lt_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_lt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_lt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_lt_i64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_lt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_lt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_lt_u64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_lt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_lt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_ne_i64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_ne_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_ne_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_ne_u64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_ne_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_ne_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_neq_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_neq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_neq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_nge_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_nge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_nge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_ngt_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_ngt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_ngt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_nle_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_nle_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_nle_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_nlg_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_nlg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_nlg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_nlt_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_nlt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_nlt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_o_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_o_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_o_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmp_u_f64 vcc_lo, 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmp_u_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmp_u_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_class_f64 0x10abcdef12345678, v255 -// GFX1250: v_cmpx_class_f64_e32 lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_class_f64_e32 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_eq_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_eq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_eq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_eq_i64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_eq_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_eq_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_eq_u64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_eq_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_eq_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_ge_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_ge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_ge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_ge_i64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_ge_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_ge_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_ge_u64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_ge_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_ge_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_gt_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_gt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_gt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_gt_i64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_gt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_gt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_gt_u64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_gt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_gt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_le_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_le_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_le_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_le_i64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_le_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_le_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_le_u64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_le_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_le_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_lg_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_lg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_lg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_lt_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_lt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_lt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_lt_i64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_lt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_lt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_lt_u64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_lt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_lt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_ne_i64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_ne_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_ne_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_ne_u64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_ne_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_ne_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_neq_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_neq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_neq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_nge_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_nge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_nge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_ngt_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_ngt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_ngt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_nle_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_nle_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_nle_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_nlg_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_nlg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_nlg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_nlt_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_nlt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_nlt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_o_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_o_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_o_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_cmpx_u_f64 0x10abcdef12345678, v[254:255] -// GFX1250: v_cmpx_u_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +// GFX1250: v_cmpx_u_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] v_ceil_f64 v[254:255], 153.1 -// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x4063233333333333) ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40] +// GFX1250: v_ceil_f64_e32 v[254:255], 0x4063233333333333 ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40] v_ceil_f64 v[254:255], 1.5e22 -// GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x448969368974c05b) ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44] +// GFX1250: v_ceil_f64_e32 v[254:255], 0x448969368974c05b ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44] // These 64-bit literals can be represented as 32-bit with encoding 255. HW behavior: // 64 bit float: the lower 32-bit are padded with zero diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s index 2a761d9..16cec8b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s global_load_b96 v[1:3], v[0:1], off -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_b32 v5, v[2:3] scale_offset // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index 811c6eb..6950c72 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -26,7 +26,7 @@ v_mov_b64 v[4:5], 0.5 // GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e] v_mov_b64 v[254:255], 0xaf123456 -// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] v_tanh_f32 v5, v1 // GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 40fcd6f..0d61c1f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -1,5 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s v_mov_b64_e32 v[4:5], v[2:3] // GFX1250: v_mov_b64_e32 v[4:5], v[2:3] ; encoding: [0x02,0x3b,0x08,0x7e] @@ -26,7 +27,7 @@ v_mov_b64 v[4:5], 0.5 // GFX1250: v_mov_b64_e32 v[4:5], 0.5 ; encoding: [0xf0,0x3a,0x08,0x7e] v_mov_b64 v[254:255], 0xaf123456 -// GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] v_tanh_f32 v5, v1 // GFX1250: v_tanh_f32_e32 v5, v1 ; encoding: [0x01,0x3d,0x0a,0x7e] @@ -628,8 +629,8 @@ v_cvt_f16_fp8 v1.l, 0x1234 v_cvt_f16_fp8 v1.h, v2 // GFX1250: v_cvt_f16_fp8_e32 v1.h, v2 ; encoding: [0x02,0xef,0x02,0x7f] -v_cvt_pk_f16_bf8 v1, v2 -// GFX1250: v_cvt_pk_f16_bf8 v1, v2 ; encoding: [0x02,0xed,0x02,0x7e] +v_cvt_pk_f16_bf8 v1, v2.l +// GFX1250: v_cvt_pk_f16_bf8 v1, v2.l ; encoding: [0x02,0xed,0x02,0x7e] v_cvt_pk_f16_bf8 v1, s2 // GFX1250: v_cvt_pk_f16_bf8 v1, s2 ; encoding: [0x02,0xec,0x02,0x7e] @@ -637,8 +638,8 @@ v_cvt_pk_f16_bf8 v1, s2 v_cvt_pk_f16_bf8 v1, 100 // GFX1250: v_cvt_pk_f16_bf8 v1, 0x64 ; encoding: [0xff,0xec,0x02,0x7e,0x64,0x00,0x00,0x00] -v_cvt_pk_f16_fp8 v1, v2 -// GFX1250: v_cvt_pk_f16_fp8 v1, v2 ; encoding: [0x02,0xeb,0x02,0x7e] +v_cvt_pk_f16_fp8 v1, v2.l +// GFX1250: v_cvt_pk_f16_fp8 v1, v2.l ; encoding: [0x02,0xeb,0x02,0x7e] v_cvt_pk_f16_fp8 v1, s2 // GFX1250: v_cvt_pk_f16_fp8 v1, s2 ; encoding: [0x02,0xea,0x02,0x7e] @@ -694,8 +695,8 @@ v_cvt_pk_f32_fp8_e32 v[2:3], s3 v_cvt_pk_f32_fp8_e32 v[2:3], 3 // GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xdc,0x04,0x7e] -v_cvt_pk_f32_fp8_e32 v[2:3], v3 -// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xdd,0x04,0x7e] +v_cvt_pk_f32_fp8_e32 v[2:3], v3.l +// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3.l ; encoding: [0x03,0xdd,0x04,0x7e] v_cvt_pk_f32_fp8_e32 v[4:5], v127.h // GFX1250: v_cvt_pk_f32_fp8_e32 v[4:5], v127.h ; encoding: [0xff,0xdd,0x08,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s index 0a1d3bf..02872b0 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2.s @@ -1,5 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX1250,GFX1250-ASM %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250,GFX1250-DIS %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200-ERR --implicit-check-not=error: %s v_fmac_f64 v[4:5], v[2:3], v[4:5] @@ -195,7 +196,7 @@ v_add_nc_u64 v[4:5], -4.0, v[4:5] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_nc_u64 v[4:5], 0xaf123456, v[4:5] -// GFX1250: v_add_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_add_nc_u64 v[4:5], 0x3f717273, v[4:5] @@ -315,7 +316,7 @@ v_sub_nc_u64 v[4:5], -4.0, v[4:5] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_nc_u64 v[4:5], 0xaf123456, v[4:5] -// GFX1250: v_sub_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_sub_nc_u64 v[4:5], 0x3f717273, v[4:5] @@ -435,7 +436,7 @@ v_mul_u64 v[4:5], -4.0, v[4:5] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_mul_u64 v[4:5], 0xaf123456, v[4:5] -// GFX1250: v_mul_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_mul_u64 v[4:5], 0x3f717273, v[4:5] @@ -519,7 +520,7 @@ v_fmamk_f64 v[6:7], v[254:255], 0x405ec00000000000, v[2:3] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] -// GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1250: v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[6:7], vcc, 0x405ec000, v[2:3] @@ -551,7 +552,7 @@ v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[254:255] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] -// GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1250: v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[254:255], 123.0, 0x405ec000, v[2:3] @@ -559,15 +560,15 @@ v_fmamk_f64 v[254:255], 123.0, 0x405ec000, v[2:3] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[4:5], v[2:3], 123.1, v[6:7] -// GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1250: v_fmamk_f64 v[4:5], v[2:3], 0x405ec66666666666, v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[4:5], 0x405ec66666666666, 123.1, v[6:7] -// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[4:5], 123.1, 123.1, v[8:9] -// GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmamk_f64 v[4:5], 1.0, 1.0, v[6:7] @@ -595,7 +596,7 @@ v_fmaak_f64 v[6:7], v[254:255], v[8:9], 0x405ec00000000000 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 -// GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[6:7], vcc, v[8:9], 0x405ec000 @@ -631,27 +632,28 @@ v_fmaak_f64 v[254:255], 0x405ec00000000000, v[254:255], 0x405ec00000000000 // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 -// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1250: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit(0x405ec00012345678) -// GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] -// GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250-ASM: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1250-DIS: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +// GFX1200-ERR: :[[@LINE-3]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[254:255], 123.0, v[2:3], 0x405ec000 // GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[4:5], v[2:3], v[2:3], 123.1 -// GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], 0x405ec66666666666 ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 123.1 -// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 0x405ec66666666666 ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[4:5], 123.1, v[8:9], 123.1 -// GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +// GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[8:9], 0x405ec66666666666 ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] // GFX1200-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU v_fmaak_f64 v[4:5], 1.0, v[8:9], 1.0 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s index 9f50361..a83d84f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop2_err.s @@ -1,7 +1,7 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX1250-ERR --implicit-check-not=error: --strict-whitespace %s v_add_f64 v[1:2], v[1:2], v[1:2] -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fmaak_f32 v4, v2, v6, 3 row_share:1 // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s index 949847e..ad5771b 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s @@ -1,6 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s s_alloc_vgpr 0x1235 // GFX12: s_alloc_vgpr 0x1235 ; encoding: [0xff,0x53,0x80,0xbe,0x35,0x12,0x00,0x00] @@ -859,7 +860,7 @@ s_mov_b64 s[0:1], 0x3f717273 s_mov_b64 s[0:1], 0xaf123456 // GFX1200: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_mov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_mov_b64 s[0:1], null // GFX12: s_mov_b64 s[0:1], null ; encoding: [0x7c,0x01,0x80,0xbe] @@ -968,7 +969,7 @@ s_cmov_b64 s[0:1], 0x3f717273 s_cmov_b64 s[0:1], 0xaf123456 // GFX1200: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_cmov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_not_b32 s0, s1 // GFX12: s_not_b32 s0, s1 ; encoding: [0x01,0x1e,0x80,0xbe] @@ -1071,7 +1072,7 @@ s_not_b64 s[0:1], 0x3f717273 s_not_b64 s[0:1], 0xaf123456 // GFX1200: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_not_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_wqm_b32 s0, s1 // GFX12: s_wqm_b32 s0, s1 ; encoding: [0x01,0x1c,0x80,0xbe] @@ -1174,7 +1175,7 @@ s_wqm_b64 s[0:1], 0x3f717273 s_wqm_b64 s[0:1], 0xaf123456 // GFX1200: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_wqm_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_brev_b32 s0, s1 // GFX12: s_brev_b32 s0, s1 ; encoding: [0x01,0x04,0x80,0xbe] @@ -1277,7 +1278,7 @@ s_brev_b64 s[0:1], 0x3f717273 s_brev_b64 s[0:1], 0xaf123456 // GFX1200: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_brev_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bcnt0_i32_b32 s0, s1 // GFX12: s_bcnt0_i32_b32 s0, s1 ; encoding: [0x01,0x16,0x80,0xbe] @@ -1389,7 +1390,7 @@ s_bcnt0_i32_b64 s0, 0x3f717273 s_bcnt0_i32_b64 s0, 0xaf123456 // GFX1200: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_bcnt0_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bcnt1_i32_b32 s0, s1 // GFX12: s_bcnt1_i32_b32 s0, s1 ; encoding: [0x01,0x18,0x80,0xbe] @@ -1501,7 +1502,7 @@ s_bcnt1_i32_b64 s0, 0x3f717273 s_bcnt1_i32_b64 s0, 0xaf123456 // GFX1200: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_bcnt1_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_ff1_i32_b32 s0, s1 // GFX12: s_ctz_i32_b32 s0, s1 ; encoding: [0x01,0x08,0x80,0xbe] @@ -1613,7 +1614,7 @@ s_ff1_i32_b64 s0, 0x3f717273 s_ff1_i32_b64 s0, 0xaf123456 // GFX1200: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_ctz_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_flbit_i32_b32 s0, s1 // GFX12: s_clz_i32_u32 s0, s1 ; encoding: [0x01,0x0a,0x80,0xbe] @@ -1725,7 +1726,7 @@ s_flbit_i32_b64 s0, 0x3f717273 s_flbit_i32_b64 s0, 0xaf123456 // GFX1200: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_clz_i32_u64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_flbit_i32 s0, s1 // GFX12: s_cls_i32 s0, s1 ; encoding: [0x01,0x0c,0x80,0xbe] @@ -1837,7 +1838,7 @@ s_flbit_i32_i64 s0, 0x3f717273 s_flbit_i32_i64 s0, 0xaf123456 // GFX1200: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_cls_i32_i64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_sext_i32_i8 s0, s1 // GFX12: s_sext_i32_i8 s0, s1 ; encoding: [0x01,0x0e,0x80,0xbe] @@ -2283,7 +2284,7 @@ s_and_saveexec_b64 s[0:1], 0x3f717273 s_and_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_saveexec_b64 s[0:1], s[2:3] // GFX12: s_or_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x23,0x80,0xbe] @@ -2323,7 +2324,7 @@ s_or_saveexec_b64 s[0:1], 0x3f717273 s_or_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xor_saveexec_b64 s[0:1], s[2:3] // GFX12: s_xor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x25,0x80,0xbe] @@ -2363,7 +2364,7 @@ s_xor_saveexec_b64 s[0:1], 0x3f717273 s_xor_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_xor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_saveexec_b64 s[0:1], s[2:3] // GFX12: s_and_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x31,0x80,0xbe] @@ -2403,7 +2404,7 @@ s_andn2_saveexec_b64 s[0:1], 0x3f717273 s_andn2_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn2_saveexec_b64 s[0:1], s[2:3] // GFX12: s_or_not1_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x33,0x80,0xbe] @@ -2443,7 +2444,7 @@ s_orn2_saveexec_b64 s[0:1], 0x3f717273 s_orn2_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nand_saveexec_b64 s[0:1], s[2:3] // GFX12: s_nand_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x27,0x80,0xbe] @@ -2483,7 +2484,7 @@ s_nand_saveexec_b64 s[0:1], 0x3f717273 s_nand_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_nand_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nor_saveexec_b64 s[0:1], s[2:3] // GFX12: s_nor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x29,0x80,0xbe] @@ -2523,7 +2524,7 @@ s_nor_saveexec_b64 s[0:1], 0x3f717273 s_nor_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_nor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xnor_saveexec_b64 s[0:1], s[2:3] // GFX12: s_xnor_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2b,0x80,0xbe] @@ -2563,7 +2564,7 @@ s_xnor_saveexec_b64 s[0:1], 0x3f717273 s_xnor_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_xnor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_quadmask_b32 s0, s1 // GFX12: s_quadmask_b32 s0, s1 ; encoding: [0x01,0x1a,0x80,0xbe] @@ -2666,7 +2667,7 @@ s_quadmask_b64 s[0:1], 0x3f717273 s_quadmask_b64 s[0:1], 0xaf123456 // GFX1200: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_quadmask_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_movrels_b32 s0, s1 // GFX12: s_movrels_b32 s0, s1 ; encoding: [0x01,0x40,0x80,0xbe] @@ -2811,7 +2812,7 @@ s_movreld_b64 s[0:1], 0x3f717273 s_movreld_b64 s[0:1], 0xaf123456 // GFX1200: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_movreld_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_abs_i32 s0, s1 // GFX12: s_abs_i32 s0, s1 ; encoding: [0x01,0x15,0x80,0xbe] @@ -2911,7 +2912,7 @@ s_andn1_saveexec_b64 s[0:1], 0x3f717273 s_andn1_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn1_saveexec_b64 s[0:1], s[2:3] // GFX12: s_or_not0_saveexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x2f,0x80,0xbe] @@ -2951,7 +2952,7 @@ s_orn1_saveexec_b64 s[0:1], 0x3f717273 s_orn1_saveexec_b64 s[0:1], 0xaf123456 // GFX1200: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn1_wrexec_b64 s[0:1], s[2:3] // GFX12: s_and_not0_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x35,0x80,0xbe] @@ -2991,7 +2992,7 @@ s_andn1_wrexec_b64 s[0:1], 0x3f717273 s_andn1_wrexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_wrexec_b64 s[0:1], s[2:3] // GFX12: s_and_not1_wrexec_b64 s[0:1], s[2:3] ; encoding: [0x02,0x37,0x80,0xbe] @@ -3031,7 +3032,7 @@ s_andn2_wrexec_b64 s[0:1], 0x3f717273 s_andn2_wrexec_b64 s[0:1], 0xaf123456 // GFX1200: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bitreplicate_b64_b32 s[0:1], s2 // GFX12: s_bitreplicate_b64_b32 s[0:1], s2 ; encoding: [0x02,0x14,0x80,0xbe] @@ -3830,7 +3831,7 @@ s_ctz_i32_b64 exec_hi, src_scc s_ctz_i32_b64 null, 0xaf123456 // GFX1200: s_ctz_i32_b64 null, 0xaf123456 ; encoding: [0xff,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_ctz_i32_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_ctz_i32_b64 null, 0xaf123456 ; encoding: [0xfe,0x09,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not1_saveexec_b64 s[10:11], s[2:3] // GFX12: s_and_not1_saveexec_b64 s[10:11], s[2:3] ; encoding: [0x02,0x31,0x8a,0xbe] @@ -3858,7 +3859,7 @@ s_and_not1_saveexec_b64 ttmp[14:15], src_scc s_and_not1_saveexec_b64 null, 0xaf123456 // GFX1200: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x31,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not0_saveexec_b32 s5, s1 // GFX12: s_and_not0_saveexec_b32 s5, s1 ; encoding: [0x01,0x2c,0x85,0xbe] @@ -3919,7 +3920,7 @@ s_and_not0_saveexec_b64 ttmp[14:15], src_scc s_and_not0_saveexec_b64 null, 0xaf123456 // GFX1200: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not0_wrexec_b32 s5, s1 // GFX12: s_and_not0_wrexec_b32 s5, s1 ; encoding: [0x01,0x34,0x85,0xbe] @@ -3980,7 +3981,7 @@ s_and_not0_wrexec_b64 ttmp[14:15], src_scc s_and_not0_wrexec_b64 null, 0xaf123456 // GFX1200: s_and_not0_wrexec_b64 null, 0xaf123456 ; encoding: [0xff,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not0_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not0_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x35,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not1_saveexec_b32 s5, s1 // GFX12: s_and_not1_saveexec_b32 s5, s1 ; encoding: [0x01,0x30,0x85,0xbe] @@ -4074,7 +4075,7 @@ s_and_not1_wrexec_b64 ttmp[14:15], src_scc s_and_not1_wrexec_b64 null, 0xaf123456 // GFX1200: s_and_not1_wrexec_b64 null, 0xaf123456 ; encoding: [0xff,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_wrexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_wrexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x37,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_cls_i32 s5, s1 // GFX12: s_cls_i32 s5, s1 ; encoding: [0x01,0x0c,0x85,0xbe] @@ -4144,7 +4145,7 @@ s_cls_i32_i64 exec_hi, src_scc s_cls_i32_i64 null, 0xaf123456 // GFX1200: s_cls_i32_i64 null, 0xaf123456 ; encoding: [0xff,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_cls_i32_i64 null, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_cls_i32_i64 null, 0xaf123456 ; encoding: [0xfe,0x0d,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_clz_i32_u32 s5, s1 // GFX12: s_clz_i32_u32 s5, s1 ; encoding: [0x01,0x0a,0x85,0xbe] @@ -4214,7 +4215,7 @@ s_clz_i32_u64 exec_hi, src_scc s_clz_i32_u64 null, 0xaf123456 // GFX1200: s_clz_i32_u64 null, 0xaf123456 ; encoding: [0xff,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_clz_i32_u64 null, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_clz_i32_u64 null, 0xaf123456 ; encoding: [0xfe,0x0b,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not0_saveexec_b32 s5, s1 // GFX12: s_or_not0_saveexec_b32 s5, s1 ; encoding: [0x01,0x2e,0x85,0xbe] @@ -4275,7 +4276,7 @@ s_or_not0_saveexec_b64 ttmp[14:15], src_scc s_or_not0_saveexec_b64 null, 0xaf123456 // GFX1200: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not0_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not0_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x2f,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not1_saveexec_b32 s5, s1 // GFX12: s_or_not1_saveexec_b32 s5, s1 ; encoding: [0x01,0x32,0x85,0xbe] @@ -4336,4 +4337,4 @@ s_or_not1_saveexec_b64 ttmp[14:15], src_scc s_or_not1_saveexec_b64 null, 0xaf123456 // GFX1200: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xff,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_saveexec_b64 null, lit64(0xaf123456) ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not1_saveexec_b64 null, 0xaf123456 ; encoding: [0xfe,0x33,0xfc,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s index 2ecec4c..9c83879 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop2.s @@ -1,6 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefixes=GFX12,GFX1200 %s // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefixes=GFX12,GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s s_add_nc_u64 s[0:1], s[2:3], s[4:5] // GFX12: s_add_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xa9] @@ -55,7 +56,7 @@ s_add_nc_u64 s[0:1], 0x3f717273, s[2:3] s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] // GFX1200: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf] -// GFX1250: s_add_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_add_nc_u64 s[0:1], s[2:3], exec // GFX12: s_add_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xa9] @@ -80,7 +81,7 @@ s_add_nc_u64 s[0:1], s[2:3], 0x3f717273 s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf] -// GFX1250: s_add_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_sub_nc_u64 s[0:1], s[2:3], s[4:5] // GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x00,0xaa] @@ -135,7 +136,7 @@ s_sub_nc_u64 s[0:1], 0x3f717273, s[2:3] s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] // GFX1200: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_sub_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_sub_nc_u64 s[0:1], s[2:3], exec // GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x00,0xaa] @@ -160,7 +161,7 @@ s_sub_nc_u64 s[0:1], s[2:3], 0x3f717273 s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_sub_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_mul_u64 s[0:1], s[2:3], s[4:5] // GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xaa] @@ -215,7 +216,7 @@ s_mul_u64 s[0:1], 0x3f717273, s[2:3] s_mul_u64 s[0:1], 0xaf123456, s[2:3] // GFX1200: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_mul_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_mul_u64 s[0:1], s[2:3], exec // GFX12: s_mul_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xaa] @@ -240,7 +241,7 @@ s_mul_u64 s[0:1], s[2:3], 0x3f717273 s_mul_u64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf] -// GFX1250: s_mul_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_add_f32 s5, s1, s2 // GFX12: s_add_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa0] @@ -2358,7 +2359,7 @@ s_cselect_b64 s[0:1], 0x3f717273, s[4:5] s_cselect_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf] -// GFX1250: s_cselect_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_cselect_b64 s[0:1], s[2:3], exec // GFX12: s_cselect_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x98] @@ -2383,7 +2384,7 @@ s_cselect_b64 s[0:1], s[2:3], 0x3f717273 s_cselect_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf] -// GFX1250: s_cselect_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_b32 s0, s1, s2 // GFX12: s_and_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8b] @@ -2552,7 +2553,7 @@ s_and_b64 s[0:1], 0x3f717273, s[4:5] s_and_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_b64 s[0:1], s[2:3], exec // GFX12: s_and_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8b] @@ -2577,7 +2578,7 @@ s_and_b64 s[0:1], s[2:3], 0x3f717273 s_and_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_b32 s0, s1, s2 // GFX12: s_or_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8c] @@ -2737,7 +2738,7 @@ s_or_b64 s[0:1], 0x3f717273, s[4:5] s_or_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_b64 s[0:1], s[2:3], exec // GFX12: s_or_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8c] @@ -2762,7 +2763,7 @@ s_or_b64 s[0:1], s[2:3], 0x3f717273 s_or_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xor_b32 s0, s1, s2 // GFX12: s_xor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8d] @@ -2922,7 +2923,7 @@ s_xor_b64 s[0:1], 0x3f717273, s[4:5] s_xor_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf] -// GFX1250: s_xor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xor_b64 s[0:1], s[2:3], exec // GFX12: s_xor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8d] @@ -2947,7 +2948,7 @@ s_xor_b64 s[0:1], s[2:3], 0x3f717273 s_xor_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf] -// GFX1250: s_xor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_b32 s0, s1, s2 // GFX12: s_and_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x91] @@ -3107,7 +3108,7 @@ s_andn2_b64 s[0:1], 0x3f717273, s[4:5] s_andn2_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_andn2_b64 s[0:1], s[2:3], exec // GFX12: s_and_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x91] @@ -3132,7 +3133,7 @@ s_andn2_b64 s[0:1], s[2:3], 0x3f717273 s_andn2_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn2_b32 s0, s1, s2 // GFX12: s_or_not1_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x92] @@ -3292,7 +3293,7 @@ s_orn2_b64 s[0:1], 0x3f717273, s[4:5] s_orn2_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_orn2_b64 s[0:1], s[2:3], exec // GFX12: s_or_not1_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x92] @@ -3317,7 +3318,7 @@ s_orn2_b64 s[0:1], s[2:3], 0x3f717273 s_orn2_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nand_b32 s0, s1, s2 // GFX12: s_nand_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8e] @@ -3477,7 +3478,7 @@ s_nand_b64 s[0:1], 0x3f717273, s[4:5] s_nand_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf] -// GFX1250: s_nand_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nand_b64 s[0:1], s[2:3], exec // GFX12: s_nand_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8e] @@ -3502,7 +3503,7 @@ s_nand_b64 s[0:1], s[2:3], 0x3f717273 s_nand_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf] -// GFX1250: s_nand_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nor_b32 s0, s1, s2 // GFX12: s_nor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x8f] @@ -3662,7 +3663,7 @@ s_nor_b64 s[0:1], 0x3f717273, s[4:5] s_nor_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf] -// GFX1250: s_nor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_nor_b64 s[0:1], s[2:3], exec // GFX12: s_nor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x8f] @@ -3687,7 +3688,7 @@ s_nor_b64 s[0:1], s[2:3], 0x3f717273 s_nor_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf] -// GFX1250: s_nor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xnor_b32 s0, s1, s2 // GFX12: s_xnor_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x90] @@ -3847,7 +3848,7 @@ s_xnor_b64 s[0:1], 0x3f717273, s[4:5] s_xnor_b64 s[0:1], 0xaf123456, s[4:5] // GFX1200: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf] -// GFX1250: s_xnor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_xnor_b64 s[0:1], s[2:3], exec // GFX12: s_xnor_b64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0x90] @@ -3872,7 +3873,7 @@ s_xnor_b64 s[0:1], s[2:3], 0x3f717273 s_xnor_b64 s[0:1], s[2:3], 0xaf123456 // GFX1200: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf] -// GFX1250: s_xnor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_lshl_b32 s0, s1, s2 // GFX12: s_lshl_b32 s0, s1, s2 ; encoding: [0x01,0x02,0x00,0x84] @@ -4032,7 +4033,7 @@ s_lshl_b64 s[0:1], 0x3f717273, s4 s_lshl_b64 s[0:1], 0xaf123456, s4 // GFX1200: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf] -// GFX1250: s_lshl_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_lshl_b64 s[0:1], s[2:3], exec_lo // GFX12: s_lshl_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x84] @@ -4216,7 +4217,7 @@ s_lshr_b64 s[0:1], 0x3f717273, s4 s_lshr_b64 s[0:1], 0xaf123456, s4 // GFX1200: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf] -// GFX1250: s_lshr_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_lshr_b64 s[0:1], s[2:3], exec_lo // GFX12: s_lshr_b64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x85] @@ -4400,7 +4401,7 @@ s_ashr_i64 s[0:1], 0x3f717273, s4 s_ashr_i64 s[0:1], 0xaf123456, s4 // GFX1200: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf] -// GFX1250: s_ashr_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_ashr_i64 s[0:1], s[2:3], exec_lo // GFX12: s_ashr_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x86] @@ -4995,7 +4996,7 @@ s_bfe_u64 s[0:1], 0x3f717273, s4 s_bfe_u64 s[0:1], 0xaf123456, s4 // GFX1200: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf] -// GFX1250: s_bfe_u64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bfe_u64 s[0:1], s[2:3], exec_lo // GFX12: s_bfe_u64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x00,0x94] @@ -5074,7 +5075,7 @@ s_bfe_i64 s[0:1], 0x3f717273, s4 s_bfe_i64 s[0:1], 0xaf123456, s4 // GFX1200: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf] -// GFX1250: s_bfe_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_bfe_i64 s[0:1], s[2:3], exec_lo // GFX12: s_bfe_i64 s[0:1], s[2:3], exec_lo ; encoding: [0x02,0x7e,0x80,0x94] @@ -6278,7 +6279,7 @@ s_and_not1_b64 s[10:11], vcc, ttmp[14:15] s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 // GFX1200: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_and_not1_b64 s[10:11], exec, src_scc // GFX12: s_and_not1_b64 s[10:11], exec, src_scc ; encoding: [0x7e,0xfd,0x8a,0x91] @@ -6297,7 +6298,7 @@ s_and_not1_b64 exec, src_scc, exec s_and_not1_b64 null, 0xaf123456, vcc // GFX1200: s_and_not1_b64 null, 0xaf123456, vcc ; encoding: [0xff,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf] -// GFX1250: s_and_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_and_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not1_b64 s[10:11], s[2:3], s[4:5] // GFX12: s_or_not1_b64 s[10:11], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x8a,0x92] @@ -6310,7 +6311,7 @@ s_or_not1_b64 s[10:11], vcc, ttmp[14:15] s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 // GFX1200: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xff,0x8a,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 s[10:11], ttmp[14:15], lit64(0xaf123456) ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not1_b64 s[10:11], ttmp[14:15], 0xaf123456 ; encoding: [0x7a,0xfe,0x8a,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_or_not1_b64 s[10:11], exec, src_scc // GFX12: s_or_not1_b64 s[10:11], exec, src_scc ; encoding: [0x7e,0xfd,0x8a,0x92] @@ -6329,4 +6330,4 @@ s_or_not1_b64 exec, src_scc, exec s_or_not1_b64 null, 0xaf123456, vcc // GFX1200: s_or_not1_b64 null, 0xaf123456, vcc ; encoding: [0xff,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf] -// GFX1250: s_or_not1_b64 null, lit64(0xaf123456), vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_or_not1_b64 null, 0xaf123456, vcc ; encoding: [0xfe,0x6a,0xfc,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s index cedba66d..98bb3c3 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopc.s @@ -1,6 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1200 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1250 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s s_cmp_lt_f32 s1, s2 // GFX12: s_cmp_lt_f32 s1, s2 ; encoding: [0x01,0x02,0x41,0xbf] @@ -2119,7 +2120,7 @@ s_cmp_eq_u64 s[0:1], 0x3f717273 s_cmp_eq_u64 s[0:1], 0xaf123456 // GFX1200: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf] -// GFX1250: s_cmp_eq_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] s_cmp_lg_u64 s[0:1], s[2:3] // GFX12: s_cmp_lg_u64 s[0:1], s[2:3] ; encoding: [0x00,0x02,0x11,0xbf] @@ -2162,4 +2163,4 @@ s_cmp_lg_u64 s[0:1], 0x3f717273 s_cmp_lg_u64 s[0:1], 0xaf123456 // GFX1200: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf] -// GFX1250: s_cmp_lg_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +// GFX1250: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s index 43673d1..c96a72d 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s +++ b/llvm/test/MC/AMDGPU/gfx90a_ldst_acc.s @@ -2,707 +2,707 @@ // RUN: llvm-mc -triple=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck --check-prefix=GFX90A %s // GFX90A: flat_load_ubyte a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x40,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a5, v[2:3] offset:4095 // GFX90A: flat_load_ubyte a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x40,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a255, v[2:3] offset:4095 // GFX90A: flat_load_ubyte a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x40,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a5, v[254:255] offset:4095 // GFX90A: flat_load_ubyte a5, v[2:3] ; encoding: [0x00,0x00,0x40,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a5, v[2:3] // GFX90A: flat_load_ubyte a5, v[2:3] ; encoding: [0x00,0x00,0x40,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a5, v[2:3] // GFX90A: flat_load_ubyte a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x40,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a5, v[2:3] offset:7 // GFX90A: flat_load_ubyte a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x41,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a5, v[2:3] offset:4095 glc // GFX90A: flat_load_ubyte a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte a5, v[2:3] offset:4095 slc // GFX90A: flat_load_sbyte a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a5, v[2:3] offset:4095 // GFX90A: flat_load_sbyte a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a255, v[2:3] offset:4095 // GFX90A: flat_load_sbyte a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x44,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a5, v[254:255] offset:4095 // GFX90A: flat_load_sbyte a5, v[2:3] ; encoding: [0x00,0x00,0x44,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a5, v[2:3] // GFX90A: flat_load_sbyte a5, v[2:3] ; encoding: [0x00,0x00,0x44,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a5, v[2:3] // GFX90A: flat_load_sbyte a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x44,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a5, v[2:3] offset:7 // GFX90A: flat_load_sbyte a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x45,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a5, v[2:3] offset:4095 glc // GFX90A: flat_load_sbyte a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte a5, v[2:3] offset:4095 slc // GFX90A: flat_load_ushort a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a5, v[2:3] offset:4095 // GFX90A: flat_load_ushort a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a255, v[2:3] offset:4095 // GFX90A: flat_load_ushort a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x48,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a5, v[254:255] offset:4095 // GFX90A: flat_load_ushort a5, v[2:3] ; encoding: [0x00,0x00,0x48,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a5, v[2:3] // GFX90A: flat_load_ushort a5, v[2:3] ; encoding: [0x00,0x00,0x48,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a5, v[2:3] // GFX90A: flat_load_ushort a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x48,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a5, v[2:3] offset:7 // GFX90A: flat_load_ushort a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x49,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a5, v[2:3] offset:4095 glc // GFX90A: flat_load_ushort a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ushort a5, v[2:3] offset:4095 slc // GFX90A: flat_load_sshort a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a5, v[2:3] offset:4095 // GFX90A: flat_load_sshort a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a255, v[2:3] offset:4095 // GFX90A: flat_load_sshort a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x4c,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a5, v[254:255] offset:4095 // GFX90A: flat_load_sshort a5, v[2:3] ; encoding: [0x00,0x00,0x4c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a5, v[2:3] // GFX90A: flat_load_sshort a5, v[2:3] ; encoding: [0x00,0x00,0x4c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a5, v[2:3] // GFX90A: flat_load_sshort a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x4c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a5, v[2:3] offset:7 // GFX90A: flat_load_sshort a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x4d,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a5, v[2:3] offset:4095 glc // GFX90A: flat_load_sshort a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sshort a5, v[2:3] offset:4095 slc // GFX90A: flat_load_dword a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a5, v[2:3] offset:4095 // GFX90A: flat_load_dword a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a255, v[2:3] offset:4095 // GFX90A: flat_load_dword a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x50,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a5, v[254:255] offset:4095 // GFX90A: flat_load_dword a5, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a5, v[2:3] // GFX90A: flat_load_dword a5, v[2:3] ; encoding: [0x00,0x00,0x50,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a5, v[2:3] // GFX90A: flat_load_dword a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x50,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a5, v[2:3] offset:7 // GFX90A: flat_load_dword a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x51,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a5, v[2:3] offset:4095 glc // GFX90A: flat_load_dword a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dword a5, v[2:3] offset:4095 slc // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[6:7], v[2:3] offset:4095 // GFX90A: flat_load_dwordx2 a[254:255], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0x02,0x00,0x80,0xfe] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[254:255], v[2:3] offset:4095 // GFX90A: flat_load_dwordx2 a[6:7], v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x54,0xdc,0xfe,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[6:7], v[254:255] offset:4095 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] ; encoding: [0x00,0x00,0x54,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[6:7], v[2:3] // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] ; encoding: [0x00,0x00,0x54,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[6:7], v[2:3] // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:7 ; encoding: [0x07,0x00,0x54,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[6:7], v[2:3] offset:7 // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x55,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[6:7], v[2:3] offset:4095 glc // GFX90A: flat_load_dwordx2 a[6:7], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx2 a[6:7], v[2:3] offset:4095 slc // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[6:8], v[2:3] offset:4095 // GFX90A: flat_load_dwordx3 a[252:254], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0x02,0x00,0x80,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[252:254], v[2:3] offset:4095 // GFX90A: flat_load_dwordx3 a[6:8], v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x58,0xdc,0xfe,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[6:8], v[254:255] offset:4095 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] ; encoding: [0x00,0x00,0x58,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[6:8], v[2:3] // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] ; encoding: [0x00,0x00,0x58,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[6:8], v[2:3] // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:7 ; encoding: [0x07,0x00,0x58,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[6:8], v[2:3] offset:7 // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x59,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[6:8], v[2:3] offset:4095 glc // GFX90A: flat_load_dwordx3 a[6:8], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx3 a[6:8], v[2:3] offset:4095 slc // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[6:9], v[2:3] offset:4095 // GFX90A: flat_load_dwordx4 a[252:255], v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0x02,0x00,0x80,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[252:255], v[2:3] offset:4095 // GFX90A: flat_load_dwordx4 a[6:9], v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x5c,0xdc,0xfe,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[6:9], v[254:255] offset:4095 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[6:9], v[2:3] // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] ; encoding: [0x00,0x00,0x5c,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[6:9], v[2:3] // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:7 ; encoding: [0x07,0x00,0x5c,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[6:9], v[2:3] offset:7 // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x5d,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[6:9], v[2:3] offset:4095 glc // GFX90A: flat_load_dwordx4 a[6:9], v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xdc,0x02,0x00,0x80,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_dwordx4 a[6:9], v[2:3] offset:4095 slc // GFX90A: flat_store_byte v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[2:3], a2 offset:4095 // GFX90A: flat_store_byte v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[254:255], a2 offset:4095 // GFX90A: flat_store_byte v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x60,0xdc,0x02,0xff,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[2:3], a255 offset:4095 // GFX90A: flat_store_byte v[2:3], a2 ; encoding: [0x00,0x00,0x60,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[2:3], a2 // GFX90A: flat_store_byte v[2:3], a2 ; encoding: [0x00,0x00,0x60,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[2:3], a2 // GFX90A: flat_store_byte v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x60,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[2:3], a2 offset:7 // GFX90A: flat_store_byte v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x61,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[2:3], a2 offset:4095 glc // GFX90A: flat_store_byte v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte v[2:3], a2 offset:4095 slc // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[2:3], a2 offset:4095 // GFX90A: flat_store_byte_d16_hi v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[254:255], a2 offset:4095 // GFX90A: flat_store_byte_d16_hi v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x64,0xdc,0x02,0xff,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[2:3], a255 offset:4095 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x64,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[2:3], a2 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x64,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[2:3], a2 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x64,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[2:3], a2 offset:7 // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x65,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[2:3], a2 offset:4095 glc // GFX90A: flat_store_byte_d16_hi v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_byte_d16_hi v[2:3], a2 offset:4095 slc // GFX90A: flat_store_short v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[2:3], a2 offset:4095 // GFX90A: flat_store_short v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[254:255], a2 offset:4095 // GFX90A: flat_store_short v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x68,0xdc,0x02,0xff,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[2:3], a255 offset:4095 // GFX90A: flat_store_short v[2:3], a2 ; encoding: [0x00,0x00,0x68,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[2:3], a2 // GFX90A: flat_store_short v[2:3], a2 ; encoding: [0x00,0x00,0x68,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[2:3], a2 // GFX90A: flat_store_short v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x68,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[2:3], a2 offset:7 // GFX90A: flat_store_short v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x69,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[2:3], a2 offset:4095 glc // GFX90A: flat_store_short v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short v[2:3], a2 offset:4095 slc // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[2:3], a2 offset:4095 // GFX90A: flat_store_short_d16_hi v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[254:255], a2 offset:4095 // GFX90A: flat_store_short_d16_hi v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xdc,0x02,0xff,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[2:3], a255 offset:4095 // GFX90A: flat_store_short_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x6c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[2:3], a2 // GFX90A: flat_store_short_d16_hi v[2:3], a2 ; encoding: [0x00,0x00,0x6c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[2:3], a2 // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x6c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[2:3], a2 offset:7 // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x6d,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[2:3], a2 offset:4095 glc // GFX90A: flat_store_short_d16_hi v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_short_d16_hi v[2:3], a2 offset:4095 slc // GFX90A: flat_store_dword v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[2:3], a2 offset:4095 // GFX90A: flat_store_dword v[254:255], a2 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[254:255], a2 offset:4095 // GFX90A: flat_store_dword v[2:3], a255 offset:4095 ; encoding: [0xff,0x0f,0x70,0xdc,0x02,0xff,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[2:3], a255 offset:4095 // GFX90A: flat_store_dword v[2:3], a2 ; encoding: [0x00,0x00,0x70,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[2:3], a2 // GFX90A: flat_store_dword v[2:3], a2 ; encoding: [0x00,0x00,0x70,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[2:3], a2 // GFX90A: flat_store_dword v[2:3], a2 offset:7 ; encoding: [0x07,0x00,0x70,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[2:3], a2 offset:7 // GFX90A: flat_store_dword v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x71,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[2:3], a2 offset:4095 glc // GFX90A: flat_store_dword v[2:3], a2 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dword v[2:3], a2 offset:4095 slc // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_store_dwordx2 v[254:255], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[254:255], a[2:3] offset:4095 // GFX90A: flat_store_dwordx2 v[2:3], a[254:255] offset:4095 ; encoding: [0xff,0x0f,0x74,0xdc,0x02,0xfe,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[2:3], a[254:255] offset:4095 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] ; encoding: [0x00,0x00,0x74,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[2:3], a[2:3] // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] ; encoding: [0x00,0x00,0x74,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[2:3], a[2:3] // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:7 ; encoding: [0x07,0x00,0x74,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[2:3], a[2:3] offset:7 // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x75,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_store_dwordx2 v[2:3], a[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx2 v[2:3], a[2:3] offset:4095 slc // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[2:3], a[2:4] offset:4095 // GFX90A: flat_store_dwordx3 v[254:255], a[2:4] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[254:255], a[2:4] offset:4095 // GFX90A: flat_store_dwordx3 v[2:3], a[252:254] offset:4095 ; encoding: [0xff,0x0f,0x78,0xdc,0x02,0xfc,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[2:3], a[252:254] offset:4095 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] ; encoding: [0x00,0x00,0x78,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[2:3], a[2:4] // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] ; encoding: [0x00,0x00,0x78,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[2:3], a[2:4] // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:7 ; encoding: [0x07,0x00,0x78,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[2:3], a[2:4] offset:7 // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:4095 glc ; encoding: [0xff,0x0f,0x79,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[2:3], a[2:4] offset:4095 glc // GFX90A: flat_store_dwordx3 v[2:3], a[2:4] offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx3 v[2:3], a[2:4] offset:4095 slc // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[2:3], a[2:5] offset:4095 // GFX90A: flat_store_dwordx4 v[254:255], a[2:5] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0xfe,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[254:255], a[2:5] offset:4095 // GFX90A: flat_store_dwordx4 v[2:3], a[252:255] offset:4095 ; encoding: [0xff,0x0f,0x7c,0xdc,0x02,0xfc,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[2:3], a[252:255] offset:4095 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] ; encoding: [0x00,0x00,0x7c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[2:3], a[2:5] // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] ; encoding: [0x00,0x00,0x7c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[2:3], a[2:5] // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:7 ; encoding: [0x07,0x00,0x7c,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[2:3], a[2:5] offset:7 // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:4095 glc ; encoding: [0xff,0x0f,0x7d,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[2:3], a[2:5] offset:4095 glc // GFX90A: flat_store_dwordx4 v[2:3], a[2:5] offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xdc,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_store_dwordx4 v[2:3], a[2:5] offset:4095 slc // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a5, v[2:3] offset:4095 // GFX90A: flat_load_ubyte_d16 a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a255, v[2:3] offset:4095 // GFX90A: flat_load_ubyte_d16 a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a5, v[254:255] offset:4095 // GFX90A: flat_load_ubyte_d16 a5, v[2:3] ; encoding: [0x00,0x00,0x80,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a5, v[2:3] // GFX90A: flat_load_ubyte_d16 a5, v[2:3] ; encoding: [0x00,0x00,0x80,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a5, v[2:3] // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x80,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a5, v[2:3] offset:7 // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x81,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a5, v[2:3] offset:4095 glc // GFX90A: flat_load_ubyte_d16 a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16 a5, v[2:3] offset:4095 slc // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 // GFX90A: flat_load_ubyte_d16_hi a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a255, v[2:3] offset:4095 // GFX90A: flat_load_ubyte_d16_hi a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a5, v[254:255] offset:4095 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x84,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a5, v[2:3] // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x84,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a5, v[2:3] // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x84,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a5, v[2:3] offset:7 // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x85,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 glc // GFX90A: flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_ubyte_d16_hi a5, v[2:3] offset:4095 slc // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a5, v[2:3] offset:4095 // GFX90A: flat_load_sbyte_d16 a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a255, v[2:3] offset:4095 // GFX90A: flat_load_sbyte_d16 a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a5, v[254:255] offset:4095 // GFX90A: flat_load_sbyte_d16 a5, v[2:3] ; encoding: [0x00,0x00,0x88,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a5, v[2:3] // GFX90A: flat_load_sbyte_d16 a5, v[2:3] ; encoding: [0x00,0x00,0x88,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a5, v[2:3] // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x88,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a5, v[2:3] offset:7 // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x89,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a5, v[2:3] offset:4095 glc // GFX90A: flat_load_sbyte_d16 a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16 a5, v[2:3] offset:4095 slc // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 // GFX90A: flat_load_sbyte_d16_hi a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a255, v[2:3] offset:4095 // GFX90A: flat_load_sbyte_d16_hi a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a5, v[254:255] offset:4095 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x8c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a5, v[2:3] // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x8c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a5, v[2:3] // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x8c,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a5, v[2:3] offset:7 // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x8d,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 glc // GFX90A: flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_sbyte_d16_hi a5, v[2:3] offset:4095 slc // GFX90A: flat_load_short_d16 a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a5, v[2:3] offset:4095 // GFX90A: flat_load_short_d16 a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a255, v[2:3] offset:4095 // GFX90A: flat_load_short_d16 a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a5, v[254:255] offset:4095 // GFX90A: flat_load_short_d16 a5, v[2:3] ; encoding: [0x00,0x00,0x90,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a5, v[2:3] // GFX90A: flat_load_short_d16 a5, v[2:3] ; encoding: [0x00,0x00,0x90,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a5, v[2:3] // GFX90A: flat_load_short_d16 a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x90,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a5, v[2:3] offset:7 // GFX90A: flat_load_short_d16 a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x91,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a5, v[2:3] offset:4095 glc // GFX90A: flat_load_short_d16 a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16 a5, v[2:3] offset:4095 slc // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a5, v[2:3] offset:4095 // GFX90A: flat_load_short_d16_hi a255, v[2:3] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0x02,0x00,0x80,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a255, v[2:3] offset:4095 // GFX90A: flat_load_short_d16_hi a5, v[254:255] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdc,0xfe,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a5, v[254:255] offset:4095 // GFX90A: flat_load_short_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x94,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a5, v[2:3] // GFX90A: flat_load_short_d16_hi a5, v[2:3] ; encoding: [0x00,0x00,0x94,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a5, v[2:3] // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:7 ; encoding: [0x07,0x00,0x94,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a5, v[2:3] offset:7 // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:4095 glc ; encoding: [0xff,0x0f,0x95,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a5, v[2:3] offset:4095 glc // GFX90A: flat_load_short_d16_hi a5, v[2:3] offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xdc,0x02,0x00,0x80,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_load_short_d16_hi a5, v[2:3] offset:4095 slc // GFX90A: flat_atomic_swap a0, v[2:3], a2 offset:4095 glc ; encoding: [0xff,0x0f,0x01,0xdd,0x02,0x02,0x80,0x00] @@ -810,371 +810,371 @@ flat_atomic_inc_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc flat_atomic_dec_x2 a[0:1], v[2:3], a[2:3] offset:4095 glc // GFX90A: flat_atomic_swap v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x00,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_swap v[2:3], a2 offset:4095 // GFX90A: flat_atomic_cmpswap v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x04,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_cmpswap v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_add v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x08,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_add v[2:3], a2 offset:4095 // GFX90A: flat_atomic_sub v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_sub v[2:3], a2 offset:4095 // GFX90A: flat_atomic_smin v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x10,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_smin v[2:3], a2 offset:4095 // GFX90A: flat_atomic_umin v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x14,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_umin v[2:3], a2 offset:4095 // GFX90A: flat_atomic_smax v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x18,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_smax v[2:3], a2 offset:4095 // GFX90A: flat_atomic_umax v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_umax v[2:3], a2 offset:4095 // GFX90A: flat_atomic_and v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x20,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_and v[2:3], a2 offset:4095 // GFX90A: flat_atomic_or v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x24,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_or v[2:3], a2 offset:4095 // GFX90A: flat_atomic_xor v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x28,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_xor v[2:3], a2 offset:4095 // GFX90A: flat_atomic_inc v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_inc v[2:3], a2 offset:4095 // GFX90A: flat_atomic_dec v[2:3], a2 offset:4095 ; encoding: [0xff,0x0f,0x30,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_dec v[2:3], a2 offset:4095 // GFX90A: flat_atomic_swap_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x80,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_swap_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_cmpswap_x2 v[2:3], a[2:5] offset:4095 ; encoding: [0xff,0x0f,0x84,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_cmpswap_x2 v[2:3], a[2:5] offset:4095 // GFX90A: flat_atomic_add_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x88,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_add_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_sub_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x8c,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_sub_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_smin_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x90,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_smin_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_umin_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x94,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_umin_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_smax_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x98,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_smax_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_umax_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0x9c,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_umax_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_and_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xa0,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_and_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_or_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xa4,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_or_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_xor_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xa8,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_xor_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_inc_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xac,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_inc_x2 v[2:3], a[2:3] offset:4095 // GFX90A: flat_atomic_dec_x2 v[2:3], a[2:3] offset:4095 ; encoding: [0xff,0x0f,0xb0,0xdd,0x02,0x02,0x80,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction flat_atomic_dec_x2 v[2:3], a[2:3] offset:4095 // GFX90A: global_load_ubyte a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x40,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte a5, v[2:3], off offset:-1 // GFX90A: global_load_ubyte a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x40,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte a255, v[2:3], off offset:-1 // GFX90A: global_load_ubyte a5, v[2:3], off ; encoding: [0x00,0x80,0x40,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte a5, v[2:3], off // GFX90A: global_load_sbyte a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte a5, v[2:3], off offset:-1 // GFX90A: global_load_sbyte a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x44,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte a255, v[2:3], off offset:-1 // GFX90A: global_load_sbyte a5, v[2:3], off ; encoding: [0x00,0x80,0x44,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte a5, v[2:3], off // GFX90A: global_load_ushort a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ushort a5, v[2:3], off offset:-1 // GFX90A: global_load_ushort a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x48,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ushort a255, v[2:3], off offset:-1 // GFX90A: global_load_ushort a5, v[2:3], off ; encoding: [0x00,0x80,0x48,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ushort a5, v[2:3], off // GFX90A: global_load_sshort a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sshort a5, v[2:3], off offset:-1 // GFX90A: global_load_sshort a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x4c,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sshort a255, v[2:3], off offset:-1 // GFX90A: global_load_sshort a5, v[2:3], off ; encoding: [0x00,0x80,0x4c,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sshort a5, v[2:3], off // GFX90A: global_load_dword a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dword a5, v[2:3], off offset:-1 // GFX90A: global_load_dword a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x50,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dword a255, v[2:3], off offset:-1 // GFX90A: global_load_dword a5, v[2:3], off ; encoding: [0x00,0x80,0x50,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dword a5, v[2:3], off // GFX90A: global_load_dwordx2 a[6:7], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x02,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx2 a[6:7], v[2:3], off offset:-1 // GFX90A: global_load_dwordx2 a[254:255], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x54,0xdc,0x02,0x00,0xff,0xfe] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx2 a[254:255], v[2:3], off offset:-1 // GFX90A: global_load_dwordx2 a[6:7], v[2:3], off ; encoding: [0x00,0x80,0x54,0xdc,0x02,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx2 a[6:7], v[2:3], off // GFX90A: global_load_dwordx3 a[6:8], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x02,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx3 a[6:8], v[2:3], off offset:-1 // GFX90A: global_load_dwordx3 a[252:254], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x58,0xdc,0x02,0x00,0xff,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx3 a[252:254], v[2:3], off offset:-1 // GFX90A: global_load_dwordx3 a[6:8], v[2:3], off ; encoding: [0x00,0x80,0x58,0xdc,0x02,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx3 a[6:8], v[2:3], off // GFX90A: global_load_dwordx4 a[6:9], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x02,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx4 a[6:9], v[2:3], off offset:-1 // GFX90A: global_load_dwordx4 a[252:255], v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x5c,0xdc,0x02,0x00,0xff,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx4 a[252:255], v[2:3], off offset:-1 // GFX90A: global_load_dwordx4 a[6:9], v[2:3], off ; encoding: [0x00,0x80,0x5c,0xdc,0x02,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx4 a[6:9], v[2:3], off // GFX90A: global_store_byte v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_byte v[2:3], a2, off offset:-1 // GFX90A: global_store_byte v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x60,0xdc,0x02,0xff,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_byte v[2:3], a255, off offset:-1 // GFX90A: global_store_byte v[2:3], a2, off ; encoding: [0x00,0x80,0x60,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_byte v[2:3], a2, off // GFX90A: global_store_byte_d16_hi v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_byte_d16_hi v[2:3], a2, off offset:-1 // GFX90A: global_store_byte_d16_hi v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x64,0xdc,0x02,0xff,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_byte_d16_hi v[2:3], a255, off offset:-1 // GFX90A: global_store_byte_d16_hi v[2:3], a2, off ; encoding: [0x00,0x80,0x64,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_byte_d16_hi v[2:3], a2, off // GFX90A: global_store_short v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_short v[2:3], a2, off offset:-1 // GFX90A: global_store_short v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x68,0xdc,0x02,0xff,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_short v[2:3], a255, off offset:-1 // GFX90A: global_store_short v[2:3], a2, off ; encoding: [0x00,0x80,0x68,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_short v[2:3], a2, off // GFX90A: global_store_short_d16_hi v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_short_d16_hi v[2:3], a2, off offset:-1 // GFX90A: global_store_short_d16_hi v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x6c,0xdc,0x02,0xff,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_short_d16_hi v[2:3], a255, off offset:-1 // GFX90A: global_store_short_d16_hi v[2:3], a2, off ; encoding: [0x00,0x80,0x6c,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_short_d16_hi v[2:3], a2, off // GFX90A: global_store_dword v[2:3], a2, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dword v[2:3], a2, off offset:-1 // GFX90A: global_store_dword v[2:3], a255, off offset:-1 ; encoding: [0xff,0x9f,0x70,0xdc,0x02,0xff,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dword v[2:3], a255, off offset:-1 // GFX90A: global_store_dword v[2:3], a2, off ; encoding: [0x00,0x80,0x70,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dword v[2:3], a2, off // GFX90A: global_store_dwordx2 v[2:3], a[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx2 v[2:3], a[2:3], off offset:-1 // GFX90A: global_store_dwordx2 v[2:3], a[254:255], off offset:-1 ; encoding: [0xff,0x9f,0x74,0xdc,0x02,0xfe,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx2 v[2:3], a[254:255], off offset:-1 // GFX90A: global_store_dwordx2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x74,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx2 v[2:3], a[2:3], off // GFX90A: global_store_dwordx3 v[2:3], a[2:4], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx3 v[2:3], a[2:4], off offset:-1 // GFX90A: global_store_dwordx3 v[2:3], a[252:254], off offset:-1 ; encoding: [0xff,0x9f,0x78,0xdc,0x02,0xfc,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx3 v[2:3], a[252:254], off offset:-1 // GFX90A: global_store_dwordx3 v[2:3], a[2:4], off ; encoding: [0x00,0x80,0x78,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx3 v[2:3], a[2:4], off // GFX90A: global_store_dwordx4 v[2:3], a[2:5], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx4 v[2:3], a[2:5], off offset:-1 // GFX90A: global_store_dwordx4 v[2:3], a[252:255], off offset:-1 ; encoding: [0xff,0x9f,0x7c,0xdc,0x02,0xfc,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx4 v[2:3], a[252:255], off offset:-1 // GFX90A: global_store_dwordx4 v[2:3], a[2:5], off ; encoding: [0x00,0x80,0x7c,0xdc,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_store_dwordx4 v[2:3], a[2:5], off // GFX90A: global_load_ubyte_d16 a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte_d16 a5, v[2:3], off offset:-1 // GFX90A: global_load_ubyte_d16 a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x80,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte_d16 a255, v[2:3], off offset:-1 // GFX90A: global_load_ubyte_d16 a5, v[2:3], off ; encoding: [0x00,0x80,0x80,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte_d16 a5, v[2:3], off // GFX90A: global_load_ubyte_d16_hi a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte_d16_hi a5, v[2:3], off offset:-1 // GFX90A: global_load_ubyte_d16_hi a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x84,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte_d16_hi a255, v[2:3], off offset:-1 // GFX90A: global_load_ubyte_d16_hi a5, v[2:3], off ; encoding: [0x00,0x80,0x84,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_ubyte_d16_hi a5, v[2:3], off // GFX90A: global_load_sbyte_d16 a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte_d16 a5, v[2:3], off offset:-1 // GFX90A: global_load_sbyte_d16 a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x88,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte_d16 a255, v[2:3], off offset:-1 // GFX90A: global_load_sbyte_d16 a5, v[2:3], off ; encoding: [0x00,0x80,0x88,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte_d16 a5, v[2:3], off // GFX90A: global_load_sbyte_d16_hi a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte_d16_hi a5, v[2:3], off offset:-1 // GFX90A: global_load_sbyte_d16_hi a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x8c,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte_d16_hi a255, v[2:3], off offset:-1 // GFX90A: global_load_sbyte_d16_hi a5, v[2:3], off ; encoding: [0x00,0x80,0x8c,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_sbyte_d16_hi a5, v[2:3], off // GFX90A: global_load_short_d16 a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_short_d16 a5, v[2:3], off offset:-1 // GFX90A: global_load_short_d16 a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x90,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_short_d16 a255, v[2:3], off offset:-1 // GFX90A: global_load_short_d16 a5, v[2:3], off ; encoding: [0x00,0x80,0x90,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_short_d16 a5, v[2:3], off // GFX90A: global_load_short_d16_hi a5, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_short_d16_hi a5, v[2:3], off offset:-1 // GFX90A: global_load_short_d16_hi a255, v[2:3], off offset:-1 ; encoding: [0xff,0x9f,0x94,0xdc,0x02,0x00,0xff,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_short_d16_hi a255, v[2:3], off offset:-1 // GFX90A: global_load_short_d16_hi a5, v[2:3], off ; encoding: [0x00,0x80,0x94,0xdc,0x02,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_short_d16_hi a5, v[2:3], off // GFX90A: global_atomic_swap a1, v[2:3], a2, off glc ; encoding: [0x00,0x80,0x01,0xdd,0x02,0x02,0xff,0x01] @@ -1282,5815 +1282,5815 @@ global_atomic_inc_x2 a[2:3], v[2:3], a[2:3], off glc global_atomic_dec_x2 a[2:3], v[2:3], a[2:3], off glc // GFX90A: global_atomic_swap v[2:3], a2, off ; encoding: [0x00,0x80,0x00,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_swap v[2:3], a2, off // GFX90A: global_atomic_cmpswap v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x04,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_cmpswap v[2:3], a[2:3], off // GFX90A: global_atomic_add v[2:3], a2, off ; encoding: [0x00,0x80,0x08,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_add v[2:3], a2, off // GFX90A: global_atomic_sub v[2:3], a2, off ; encoding: [0x00,0x80,0x0c,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_sub v[2:3], a2, off // GFX90A: global_atomic_smin v[2:3], a2, off ; encoding: [0x00,0x80,0x10,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_smin v[2:3], a2, off // GFX90A: global_atomic_umin v[2:3], a2, off ; encoding: [0x00,0x80,0x14,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_umin v[2:3], a2, off // GFX90A: global_atomic_smax v[2:3], a2, off ; encoding: [0x00,0x80,0x18,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_smax v[2:3], a2, off // GFX90A: global_atomic_umax v[2:3], a2, off ; encoding: [0x00,0x80,0x1c,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_umax v[2:3], a2, off // GFX90A: global_atomic_and v[2:3], a2, off ; encoding: [0x00,0x80,0x20,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_and v[2:3], a2, off // GFX90A: global_atomic_or v[2:3], a2, off ; encoding: [0x00,0x80,0x24,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_or v[2:3], a2, off // GFX90A: global_atomic_xor v[2:3], a2, off ; encoding: [0x00,0x80,0x28,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_xor v[2:3], a2, off // GFX90A: global_atomic_inc v[2:3], a2, off ; encoding: [0x00,0x80,0x2c,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_inc v[2:3], a2, off // GFX90A: global_atomic_dec v[2:3], a2, off ; encoding: [0x00,0x80,0x30,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_dec v[2:3], a2, off // GFX90A: global_atomic_swap_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x80,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_swap_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_cmpswap_x2 v[2:3], a[2:5], off ; encoding: [0x00,0x80,0x84,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_cmpswap_x2 v[2:3], a[2:5], off // GFX90A: global_atomic_add_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x88,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_add_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_sub_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x8c,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_sub_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_smin_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x90,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_smin_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_umin_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x94,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_umin_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_smax_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x98,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_smax_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_umax_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0x9c,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_umax_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_and_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xa0,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_and_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_or_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xa4,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_or_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_xor_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xa8,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_xor_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_inc_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xac,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_inc_x2 v[2:3], a[2:3], off // GFX90A: global_atomic_dec_x2 v[2:3], a[2:3], off ; encoding: [0x00,0x80,0xb0,0xdd,0x02,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction global_atomic_dec_x2 v[2:3], a[2:3], off // GFX90A: scratch_load_ubyte a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s2 offset:-1 // GFX90A: scratch_load_ubyte a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a255, off, s2 offset:-1 // GFX90A: scratch_load_ubyte a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s101 offset:-1 // GFX90A: scratch_load_ubyte a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_ubyte a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_ubyte a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_ubyte a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_ubyte a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x40,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, v0, off offset:-1 // GFX90A: scratch_load_ubyte a5, off, s2 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s2 // GFX90A: scratch_load_ubyte a5, off, s2 ; encoding: [0x00,0x40,0x40,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s2 // GFX90A: scratch_load_ubyte a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x40,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s2 offset:4095 // GFX90A: scratch_load_ubyte a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x40,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s2 offset:-4096 // GFX90A: scratch_load_ubyte a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x41,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s2 offset:-1 glc // GFX90A: scratch_load_ubyte a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x42,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte a5, off, s2 offset:-1 slc // GFX90A: scratch_load_sbyte a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s2 offset:-1 // GFX90A: scratch_load_sbyte a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a255, off, s2 offset:-1 // GFX90A: scratch_load_sbyte a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s101 offset:-1 // GFX90A: scratch_load_sbyte a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_sbyte a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_sbyte a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_sbyte a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_sbyte a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x44,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, v0, off offset:-1 // GFX90A: scratch_load_sbyte a5, off, s2 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s2 // GFX90A: scratch_load_sbyte a5, off, s2 ; encoding: [0x00,0x40,0x44,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s2 // GFX90A: scratch_load_sbyte a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x44,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s2 offset:4095 // GFX90A: scratch_load_sbyte a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x44,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s2 offset:-4096 // GFX90A: scratch_load_sbyte a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x45,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s2 offset:-1 glc // GFX90A: scratch_load_sbyte a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x46,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte a5, off, s2 offset:-1 slc // GFX90A: scratch_load_ushort a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s2 offset:-1 // GFX90A: scratch_load_ushort a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a255, off, s2 offset:-1 // GFX90A: scratch_load_ushort a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s101 offset:-1 // GFX90A: scratch_load_ushort a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_ushort a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_ushort a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_ushort a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_ushort a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x48,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, v0, off offset:-1 // GFX90A: scratch_load_ushort a5, off, s2 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s2 // GFX90A: scratch_load_ushort a5, off, s2 ; encoding: [0x00,0x40,0x48,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s2 // GFX90A: scratch_load_ushort a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x48,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s2 offset:4095 // GFX90A: scratch_load_ushort a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x48,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s2 offset:-4096 // GFX90A: scratch_load_ushort a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x49,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s2 offset:-1 glc // GFX90A: scratch_load_ushort a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4a,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ushort a5, off, s2 offset:-1 slc // GFX90A: scratch_load_sshort a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s2 offset:-1 // GFX90A: scratch_load_sshort a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a255, off, s2 offset:-1 // GFX90A: scratch_load_sshort a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s101 offset:-1 // GFX90A: scratch_load_sshort a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_sshort a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_sshort a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_sshort a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_sshort a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x4c,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, v0, off offset:-1 // GFX90A: scratch_load_sshort a5, off, s2 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s2 // GFX90A: scratch_load_sshort a5, off, s2 ; encoding: [0x00,0x40,0x4c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s2 // GFX90A: scratch_load_sshort a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x4c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s2 offset:4095 // GFX90A: scratch_load_sshort a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x4c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s2 offset:-4096 // GFX90A: scratch_load_sshort a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x4d,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s2 offset:-1 glc // GFX90A: scratch_load_sshort a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x4e,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sshort a5, off, s2 offset:-1 slc // GFX90A: scratch_load_dword a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s2 offset:-1 // GFX90A: scratch_load_dword a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a255, off, s2 offset:-1 // GFX90A: scratch_load_dword a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s101 offset:-1 // GFX90A: scratch_load_dword a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_dword a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_dword a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_dword a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_dword a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x50,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, v0, off offset:-1 // GFX90A: scratch_load_dword a5, off, s2 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s2 // GFX90A: scratch_load_dword a5, off, s2 ; encoding: [0x00,0x40,0x50,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s2 // GFX90A: scratch_load_dword a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x50,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s2 offset:4095 // GFX90A: scratch_load_dword a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x50,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s2 offset:-4096 // GFX90A: scratch_load_dword a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x51,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s2 offset:-1 glc // GFX90A: scratch_load_dword a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x52,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dword a5, off, s2 offset:-1 slc // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s2 offset:-1 // GFX90A: scratch_load_dwordx2 a[254:255], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0x82,0xfe] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[254:255], off, s2 offset:-1 // GFX90A: scratch_load_dwordx2 a[6:7], off, s101 offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xe5,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s101 offset:-1 // GFX90A: scratch_load_dwordx2 a[6:7], off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xe6,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_dwordx2 a[6:7], off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xe7,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_dwordx2 a[6:7], off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xea,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, vcc_lo offset:-1 // GFX90A: scratch_load_dwordx2 a[6:7], off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xeb,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, vcc_hi offset:-1 // GFX90A: scratch_load_dwordx2 a[6:7], v0, off offset:-1 ; encoding: [0xff,0x5f,0x54,0xdc,0x00,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], v0, off offset:-1 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s2 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 ; encoding: [0x00,0x40,0x54,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s2 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:4095 ; encoding: [0xff,0x4f,0x54,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s2 offset:4095 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-4096 ; encoding: [0x00,0x50,0x54,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s2 offset:-4096 // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x55,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s2 offset:-1 glc // GFX90A: scratch_load_dwordx2 a[6:7], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x56,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx2 a[6:7], off, s2 offset:-1 slc // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s2 offset:-1 // GFX90A: scratch_load_dwordx3 a[252:254], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0x82,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[252:254], off, s2 offset:-1 // GFX90A: scratch_load_dwordx3 a[6:8], off, s101 offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xe5,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s101 offset:-1 // GFX90A: scratch_load_dwordx3 a[6:8], off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xe6,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_dwordx3 a[6:8], off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xe7,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_dwordx3 a[6:8], off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xea,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, vcc_lo offset:-1 // GFX90A: scratch_load_dwordx3 a[6:8], off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xeb,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, vcc_hi offset:-1 // GFX90A: scratch_load_dwordx3 a[6:8], v0, off offset:-1 ; encoding: [0xff,0x5f,0x58,0xdc,0x00,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], v0, off offset:-1 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s2 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 ; encoding: [0x00,0x40,0x58,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s2 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:4095 ; encoding: [0xff,0x4f,0x58,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s2 offset:4095 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-4096 ; encoding: [0x00,0x50,0x58,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s2 offset:-4096 // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x59,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s2 offset:-1 glc // GFX90A: scratch_load_dwordx3 a[6:8], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5a,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx3 a[6:8], off, s2 offset:-1 slc // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s2 offset:-1 // GFX90A: scratch_load_dwordx4 a[252:255], off, s2 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0x82,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[252:255], off, s2 offset:-1 // GFX90A: scratch_load_dwordx4 a[6:9], off, s101 offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xe5,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s101 offset:-1 // GFX90A: scratch_load_dwordx4 a[6:9], off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xe6,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_dwordx4 a[6:9], off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xe7,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_dwordx4 a[6:9], off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xea,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, vcc_lo offset:-1 // GFX90A: scratch_load_dwordx4 a[6:9], off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xeb,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, vcc_hi offset:-1 // GFX90A: scratch_load_dwordx4 a[6:9], v0, off offset:-1 ; encoding: [0xff,0x5f,0x5c,0xdc,0x00,0x00,0xff,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], v0, off offset:-1 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s2 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 ; encoding: [0x00,0x40,0x5c,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s2 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:4095 ; encoding: [0xff,0x4f,0x5c,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s2 offset:4095 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-4096 ; encoding: [0x00,0x50,0x5c,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s2 offset:-4096 // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x5d,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s2 offset:-1 glc // GFX90A: scratch_load_dwordx4 a[6:9], off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x5e,0xdc,0x00,0x00,0x82,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_dwordx4 a[6:9], off, s2 offset:-1 slc // GFX90A: scratch_store_byte off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s3 offset:-1 // GFX90A: scratch_store_byte off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0xff,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a255, s3 offset:-1 // GFX90A: scratch_store_byte off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s101 offset:-1 // GFX90A: scratch_store_byte off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, flat_scratch_lo offset:-1 // GFX90A: scratch_store_byte off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, flat_scratch_hi offset:-1 // GFX90A: scratch_store_byte off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, vcc_lo offset:-1 // GFX90A: scratch_store_byte off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, vcc_hi offset:-1 // GFX90A: scratch_store_byte v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x60,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte v0, a2, off offset:-1 // GFX90A: scratch_store_byte off, a2, s3 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s3 // GFX90A: scratch_store_byte off, a2, s3 ; encoding: [0x00,0x40,0x60,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s3 // GFX90A: scratch_store_byte off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x60,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s3 offset:4095 // GFX90A: scratch_store_byte off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x60,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s3 offset:-4096 // GFX90A: scratch_store_byte off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x61,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s3 offset:-1 glc // GFX90A: scratch_store_byte off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x62,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte off, a2, s3 offset:-1 slc // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s3 offset:-1 // GFX90A: scratch_store_byte_d16_hi off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0xff,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a255, s3 offset:-1 // GFX90A: scratch_store_byte_d16_hi off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s101 offset:-1 // GFX90A: scratch_store_byte_d16_hi off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, flat_scratch_lo offset:-1 // GFX90A: scratch_store_byte_d16_hi off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, flat_scratch_hi offset:-1 // GFX90A: scratch_store_byte_d16_hi off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, vcc_lo offset:-1 // GFX90A: scratch_store_byte_d16_hi off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, vcc_hi offset:-1 // GFX90A: scratch_store_byte_d16_hi v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x64,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi v0, a2, off offset:-1 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s3 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x64,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s3 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x64,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s3 offset:4095 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x64,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s3 offset:-4096 // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x65,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s3 offset:-1 glc // GFX90A: scratch_store_byte_d16_hi off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x66,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_byte_d16_hi off, a2, s3 offset:-1 slc // GFX90A: scratch_store_short off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s3 offset:-1 // GFX90A: scratch_store_short off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0xff,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a255, s3 offset:-1 // GFX90A: scratch_store_short off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s101 offset:-1 // GFX90A: scratch_store_short off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, flat_scratch_lo offset:-1 // GFX90A: scratch_store_short off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, flat_scratch_hi offset:-1 // GFX90A: scratch_store_short off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, vcc_lo offset:-1 // GFX90A: scratch_store_short off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, vcc_hi offset:-1 // GFX90A: scratch_store_short v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x68,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short v0, a2, off offset:-1 // GFX90A: scratch_store_short off, a2, s3 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s3 // GFX90A: scratch_store_short off, a2, s3 ; encoding: [0x00,0x40,0x68,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s3 // GFX90A: scratch_store_short off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x68,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s3 offset:4095 // GFX90A: scratch_store_short off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x68,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s3 offset:-4096 // GFX90A: scratch_store_short off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x69,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s3 offset:-1 glc // GFX90A: scratch_store_short off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6a,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short off, a2, s3 offset:-1 slc // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s3 offset:-1 // GFX90A: scratch_store_short_d16_hi off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0xff,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a255, s3 offset:-1 // GFX90A: scratch_store_short_d16_hi off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s101 offset:-1 // GFX90A: scratch_store_short_d16_hi off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, flat_scratch_lo offset:-1 // GFX90A: scratch_store_short_d16_hi off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, flat_scratch_hi offset:-1 // GFX90A: scratch_store_short_d16_hi off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, vcc_lo offset:-1 // GFX90A: scratch_store_short_d16_hi off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, vcc_hi offset:-1 // GFX90A: scratch_store_short_d16_hi v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x6c,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi v0, a2, off offset:-1 // GFX90A: scratch_store_short_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s3 // GFX90A: scratch_store_short_d16_hi off, a2, s3 ; encoding: [0x00,0x40,0x6c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s3 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x6c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s3 offset:4095 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x6c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s3 offset:-4096 // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x6d,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s3 offset:-1 glc // GFX90A: scratch_store_short_d16_hi off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x6e,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_short_d16_hi off, a2, s3 offset:-1 slc // GFX90A: scratch_store_dword off, a2, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s3 offset:-1 // GFX90A: scratch_store_dword off, a255, s3 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0xff,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a255, s3 offset:-1 // GFX90A: scratch_store_dword off, a2, s101 offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s101 offset:-1 // GFX90A: scratch_store_dword off, a2, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, flat_scratch_lo offset:-1 // GFX90A: scratch_store_dword off, a2, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, flat_scratch_hi offset:-1 // GFX90A: scratch_store_dword off, a2, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, vcc_lo offset:-1 // GFX90A: scratch_store_dword off, a2, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, vcc_hi offset:-1 // GFX90A: scratch_store_dword v0, a2, off offset:-1 ; encoding: [0xff,0x5f,0x70,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword v0, a2, off offset:-1 // GFX90A: scratch_store_dword off, a2, s3 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s3 // GFX90A: scratch_store_dword off, a2, s3 ; encoding: [0x00,0x40,0x70,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s3 // GFX90A: scratch_store_dword off, a2, s3 offset:4095 ; encoding: [0xff,0x4f,0x70,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s3 offset:4095 // GFX90A: scratch_store_dword off, a2, s3 offset:-4096 ; encoding: [0x00,0x50,0x70,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s3 offset:-4096 // GFX90A: scratch_store_dword off, a2, s3 offset:-1 glc ; encoding: [0xff,0x5f,0x71,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s3 offset:-1 glc // GFX90A: scratch_store_dword off, a2, s3 offset:-1 slc ; encoding: [0xff,0x5f,0x72,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dword off, a2, s3 offset:-1 slc // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s3 offset:-1 // GFX90A: scratch_store_dwordx2 off, a[254:255], s3 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0xfe,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[254:255], s3 offset:-1 // GFX90A: scratch_store_dwordx2 off, a[2:3], s101 offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s101 offset:-1 // GFX90A: scratch_store_dwordx2 off, a[2:3], flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], flat_scratch_lo offset:-1 // GFX90A: scratch_store_dwordx2 off, a[2:3], flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], flat_scratch_hi offset:-1 // GFX90A: scratch_store_dwordx2 off, a[2:3], vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], vcc_lo offset:-1 // GFX90A: scratch_store_dwordx2 off, a[2:3], vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], vcc_hi offset:-1 // GFX90A: scratch_store_dwordx2 v0, a[2:3], off offset:-1 ; encoding: [0xff,0x5f,0x74,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 v0, a[2:3], off offset:-1 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s3 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 ; encoding: [0x00,0x40,0x74,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s3 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:4095 ; encoding: [0xff,0x4f,0x74,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s3 offset:4095 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-4096 ; encoding: [0x00,0x50,0x74,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s3 offset:-4096 // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-1 glc ; encoding: [0xff,0x5f,0x75,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s3 offset:-1 glc // GFX90A: scratch_store_dwordx2 off, a[2:3], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x76,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx2 off, a[2:3], s3 offset:-1 slc // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s3 offset:-1 // GFX90A: scratch_store_dwordx3 off, a[252:254], s3 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0xfc,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[252:254], s3 offset:-1 // GFX90A: scratch_store_dwordx3 off, a[2:4], s101 offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s101 offset:-1 // GFX90A: scratch_store_dwordx3 off, a[2:4], flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], flat_scratch_lo offset:-1 // GFX90A: scratch_store_dwordx3 off, a[2:4], flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], flat_scratch_hi offset:-1 // GFX90A: scratch_store_dwordx3 off, a[2:4], vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], vcc_lo offset:-1 // GFX90A: scratch_store_dwordx3 off, a[2:4], vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], vcc_hi offset:-1 // GFX90A: scratch_store_dwordx3 v0, a[2:4], off offset:-1 ; encoding: [0xff,0x5f,0x78,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 v0, a[2:4], off offset:-1 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s3 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 ; encoding: [0x00,0x40,0x78,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s3 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:4095 ; encoding: [0xff,0x4f,0x78,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s3 offset:4095 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-4096 ; encoding: [0x00,0x50,0x78,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s3 offset:-4096 // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-1 glc ; encoding: [0xff,0x5f,0x79,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s3 offset:-1 glc // GFX90A: scratch_store_dwordx3 off, a[2:4], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7a,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx3 off, a[2:4], s3 offset:-1 slc // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s3 offset:-1 // GFX90A: scratch_store_dwordx4 off, a[252:255], s3 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0xfc,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[252:255], s3 offset:-1 // GFX90A: scratch_store_dwordx4 off, a[2:5], s101 offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xe5,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s101 offset:-1 // GFX90A: scratch_store_dwordx4 off, a[2:5], flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xe6,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], flat_scratch_lo offset:-1 // GFX90A: scratch_store_dwordx4 off, a[2:5], flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xe7,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], flat_scratch_hi offset:-1 // GFX90A: scratch_store_dwordx4 off, a[2:5], vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xea,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], vcc_lo offset:-1 // GFX90A: scratch_store_dwordx4 off, a[2:5], vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xeb,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], vcc_hi offset:-1 // GFX90A: scratch_store_dwordx4 v0, a[2:5], off offset:-1 ; encoding: [0xff,0x5f,0x7c,0xdc,0x00,0x02,0xff,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 v0, a[2:5], off offset:-1 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s3 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 ; encoding: [0x00,0x40,0x7c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s3 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:4095 ; encoding: [0xff,0x4f,0x7c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s3 offset:4095 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-4096 ; encoding: [0x00,0x50,0x7c,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s3 offset:-4096 // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-1 glc ; encoding: [0xff,0x5f,0x7d,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s3 offset:-1 glc // GFX90A: scratch_store_dwordx4 off, a[2:5], s3 offset:-1 slc ; encoding: [0xff,0x5f,0x7e,0xdc,0x00,0x02,0x83,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_store_dwordx4 off, a[2:5], s3 offset:-1 slc // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s2 offset:-1 // GFX90A: scratch_load_ubyte_d16 a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a255, off, s2 offset:-1 // GFX90A: scratch_load_ubyte_d16 a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s101 offset:-1 // GFX90A: scratch_load_ubyte_d16 a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_ubyte_d16 a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_ubyte_d16 a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_ubyte_d16 a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_ubyte_d16 a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x80,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, v0, off offset:-1 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s2 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x80,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s2 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x80,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s2 offset:4095 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x80,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s2 offset:-4096 // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x81,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s2 offset:-1 glc // GFX90A: scratch_load_ubyte_d16 a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x82,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16 a5, off, s2 offset:-1 slc // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a255, off, s2 offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s101 offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x84,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, v0, off offset:-1 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s2 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x84,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s2 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x84,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s2 offset:4095 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x84,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s2 offset:-4096 // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x85,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 glc // GFX90A: scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x86,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_ubyte_d16_hi a5, off, s2 offset:-1 slc // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s2 offset:-1 // GFX90A: scratch_load_sbyte_d16 a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a255, off, s2 offset:-1 // GFX90A: scratch_load_sbyte_d16 a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s101 offset:-1 // GFX90A: scratch_load_sbyte_d16 a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_sbyte_d16 a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_sbyte_d16 a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_sbyte_d16 a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_sbyte_d16 a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x88,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, v0, off offset:-1 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s2 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 ; encoding: [0x00,0x40,0x88,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s2 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x88,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s2 offset:4095 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x88,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s2 offset:-4096 // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x89,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s2 offset:-1 glc // GFX90A: scratch_load_sbyte_d16 a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8a,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16 a5, off, s2 offset:-1 slc // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a255, off, s2 offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s101 offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x8c,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, v0, off offset:-1 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s2 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x8c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s2 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x8c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s2 offset:4095 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x8c,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s2 offset:-4096 // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x8d,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 glc // GFX90A: scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x8e,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_sbyte_d16_hi a5, off, s2 offset:-1 slc // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s2 offset:-1 // GFX90A: scratch_load_short_d16 a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a255, off, s2 offset:-1 // GFX90A: scratch_load_short_d16 a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s101 offset:-1 // GFX90A: scratch_load_short_d16 a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_short_d16 a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_short_d16 a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_short_d16 a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_short_d16 a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x90,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, v0, off offset:-1 // GFX90A: scratch_load_short_d16 a5, off, s2 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s2 // GFX90A: scratch_load_short_d16 a5, off, s2 ; encoding: [0x00,0x40,0x90,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s2 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x90,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s2 offset:4095 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x90,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s2 offset:-4096 // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x91,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s2 offset:-1 glc // GFX90A: scratch_load_short_d16 a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x92,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16 a5, off, s2 offset:-1 slc // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s2 offset:-1 // GFX90A: scratch_load_short_d16_hi a255, off, s2 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0x82,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a255, off, s2 offset:-1 // GFX90A: scratch_load_short_d16_hi a5, off, s101 offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xe5,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s101 offset:-1 // GFX90A: scratch_load_short_d16_hi a5, off, flat_scratch_lo offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xe6,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, flat_scratch_lo offset:-1 // GFX90A: scratch_load_short_d16_hi a5, off, flat_scratch_hi offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xe7,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, flat_scratch_hi offset:-1 // GFX90A: scratch_load_short_d16_hi a5, off, vcc_lo offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xea,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, vcc_lo offset:-1 // GFX90A: scratch_load_short_d16_hi a5, off, vcc_hi offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xeb,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, vcc_hi offset:-1 // GFX90A: scratch_load_short_d16_hi a5, v0, off offset:-1 ; encoding: [0xff,0x5f,0x94,0xdc,0x00,0x00,0xff,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, v0, off offset:-1 // GFX90A: scratch_load_short_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s2 // GFX90A: scratch_load_short_d16_hi a5, off, s2 ; encoding: [0x00,0x40,0x94,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s2 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:4095 ; encoding: [0xff,0x4f,0x94,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s2 offset:4095 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-4096 ; encoding: [0x00,0x50,0x94,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s2 offset:-4096 // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-1 glc ; encoding: [0xff,0x5f,0x95,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s2 offset:-1 glc // GFX90A: scratch_load_short_d16_hi a5, off, s2 offset:-1 slc ; encoding: [0xff,0x5f,0x96,0xdc,0x00,0x00,0x82,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction scratch_load_short_d16_hi a5, off, s2 offset:-1 slc // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_x a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_x a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_x a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x00,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_x a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x00,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], s3 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], s3 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x00,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_x a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_x a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_xy a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_xy a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x04,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_xy a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x04,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x04,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x04,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xy a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_xyz a[252:254], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0xfc,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[252:254], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x08,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x08,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], s3 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], s3 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x08,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x08,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyz a[6:8], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_xyzw a[252:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0xfc,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[252:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x0c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x0c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], s3 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], s3 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x0c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x0c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0e,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_xyzw a[6:9], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_x a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_x a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_x a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x10,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_x a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x10,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], s4 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], s4 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x10,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x10,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_x a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x12,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_x a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_xy a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0xfe,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[254:255], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_xy a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x14,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_xy a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x14,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x14,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x14,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x16,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xy a[2:3], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_xyz a[252:254], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0xfc,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[252:254], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x18,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x18,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], s4 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], s4 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x18,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x18,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x1a,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyz a[2:4], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_xyzw a[252:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0xfc,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[252:255], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x1c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x1c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], s4 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], s4 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x1c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x1c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x1e,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_xyzw a[2:5], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_x a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_d16_x a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x20,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_d16_x a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x20,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], s3 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], s3 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x20,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x20,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x22,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_x a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_xy a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_d16_xy a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x24,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_d16_xy a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x24,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], s3 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], s3 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x24,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x24,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x26,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xy a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x28,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x28,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x28,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x28,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2a,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyz a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x2c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x2c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x2c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x2c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2e,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_format_d16_xyzw a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_x a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_d16_x a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x30,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_d16_x a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x30,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], s4 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], s4 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x30,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x30,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x32,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_x a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_xy a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x34,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_d16_xy a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x34,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_d16_xy a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x34,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], s4 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], s4 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x34,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x34,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x36,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xy a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0xfe,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[254:255], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x38,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x38,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x38,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x38,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x38,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x3a,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyz a[2:3], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0xfe,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[254:255], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x3c,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x3c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x3c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x3c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x3c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x3c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x3e,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_format_d16_xyzw a[2:3], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ubyte a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x40,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_ubyte a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x40,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_ubyte a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x40,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], s3 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], s3 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x40,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x40,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x42,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sbyte a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x44,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_sbyte a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x44,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_sbyte a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x44,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], s3 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], s3 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x44,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x44,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x46,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ushort a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_ushort a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x48,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_ushort a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x48,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_ushort a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x48,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], s3 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], s3 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x48,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x48,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_ushort a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x4a,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ushort a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sshort a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_sshort a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x4c,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_sshort a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x4c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_sshort a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x4c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], s3 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], s3 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x4c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x4c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_sshort a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x4e,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sshort a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dword a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dword a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_dword a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_dword a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_dword a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_dword a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_dword a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_dword a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_dword a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x50,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_dword a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x50,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_dword a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x50,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], s3 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], s3 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x50,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x50,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_dword a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x52,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dword a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dwordx2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x54,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x54,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x54,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x54,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x54,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x56,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dwordx3 a[252:254], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0xfc,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[252:254], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x58,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x58,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x58,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x58,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], s3 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 ; encoding: [0x00,0x00,0x58,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], s3 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x58,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x58,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x5a,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx3 a[6:8], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dwordx4 a[252:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0xfc,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[252:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x5c,0xe0,0x00,0x06,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x5c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x5c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], s3 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], s3 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x5c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:7 // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x5c,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x5e,0xe0,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_dwordx4 a[6:9], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_byte a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_byte a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_byte a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_byte a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_byte a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_byte a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_byte a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_byte a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_byte a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_byte a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x60,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_byte a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x60,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], s4 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], s4 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x60,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x60,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_byte a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x62,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_byte_d16_hi a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x64,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x64,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x64,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], s4 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], s4 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x64,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x64,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x66,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_byte_d16_hi a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_short a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_short a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_short a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_short a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_short a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_short a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_short a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_short a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_short a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x68,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_short a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x68,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_short a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x68,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_short a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], s4 // GFX90A: buffer_store_short a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], s4 // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x68,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x68,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_short a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x6a,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_short_d16_hi a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x6c,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_short_d16_hi a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x6c,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_short_d16_hi a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x6c,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x6c,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], s4 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x6c,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], s4 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x6c,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x6c,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x6e,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_short_d16_hi a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dword a255, off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0xff,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a255, off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dword a1, off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_dword a1, off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_dword a1, off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_dword a1, off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_dword a1, off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_dword a1, off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_dword a1, off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_dword a1, off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x70,0xe0,0x00,0x01,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_dword a1, v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x70,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_dword a1, v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x70,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], s4 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], s4 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x70,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], s4 offset:7 // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x70,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_dword a1, off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x72,0xe0,0x00,0x01,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dword a1, off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dwordx2 a[254:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0xfe,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[254:255], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x74,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x74,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x74,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], s4 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x74,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x74,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x76,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx2 a[2:3], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dwordx3 a[252:254], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0xfc,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[252:254], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x78,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x78,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x78,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], s4 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], s4 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x78,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x78,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x7a,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx3 a[2:4], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dwordx4 a[252:255], off, s[12:15], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0xfc,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[252:255], off, s[12:15], s4 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[16:19], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x84,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[16:19], s4 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[96:99], s4 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x98,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[96:99], s4 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s101 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], s101 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], m0 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], m0 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], 0 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], 0 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], -1 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], -1 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], 0.5 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x7c,0xe0,0x00,0x02,0x83,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], -4.0 offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 idxen offset:4095 ; encoding: [0xff,0x2f,0x7c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 idxen offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 offen offset:4095 ; encoding: [0xff,0x1f,0x7c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], v0, s[12:15], s4 offen offset:4095 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], s4 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], s4 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:7 ; encoding: [0x07,0x00,0x7c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:7 // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 glc ; encoding: [0xff,0x4f,0x7c,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 glc // GFX90A: buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 slc ; encoding: [0xff,0x0f,0x7e,0xe0,0x00,0x02,0x83,0x04] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_store_dwordx4 a[2:5], off, s[12:15], s4 offset:4095 slc // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16 a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x80,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], s3 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], s3 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x80,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x80,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16 a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x84,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x84,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x84,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x84,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_ubyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16 a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x88,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x88,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], s3 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], s3 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x88,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x88,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16 a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x8c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x8c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x8c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x8c,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_sbyte_d16_hi a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_short_d16 a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_short_d16 a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x90,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_short_d16 a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x90,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], s3 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], s3 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x90,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x90,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16 a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_short_d16_hi a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], 0.5 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0xf0] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], 0.5 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], -4.0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe0,0x00,0x05,0x82,0xf7] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], -4.0 offset:4095 // GFX90A: buffer_load_short_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x94,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_load_short_d16_hi a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x94,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], s3 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], s3 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x94,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x94,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xe0,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_load_short_d16_hi a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_swap a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_swap a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_swap a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_swap a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_swap a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_swap a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_swap a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x00,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_swap a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x00,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], s3 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x00,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], s3 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x00,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x00,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x02,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x04,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x04,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x04,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x04,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x04,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x04,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x06,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_add a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_add a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_add a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_add a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_add a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_add a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_add a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_add a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x08,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_add a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x08,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], s3 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x08,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], s3 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x08,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x08,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_add a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0a,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_sub a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_sub a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_sub a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_sub a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_sub a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_sub a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x0c,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_sub a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x0c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_sub a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x0c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], s3 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x0c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], s3 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x0c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x0c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x0e,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smin a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smin a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_smin a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_smin a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_smin a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_smin a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x10,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_smin a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x10,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_smin a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x10,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x10,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], s3 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x10,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], s3 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x10,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x10,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x12,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umin a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umin a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_umin a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_umin a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_umin a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_umin a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x14,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_umin a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x14,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_umin a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x14,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x14,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], s3 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x14,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], s3 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x14,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x14,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x16,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smax a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smax a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_smax a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_smax a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_smax a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_smax a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x18,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_smax a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x18,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_smax a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x18,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x18,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], s3 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x18,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], s3 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x18,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x18,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x1a,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umax a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umax a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_umax a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_umax a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_umax a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_umax a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x1c,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_umax a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x1c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_umax a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x1c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x1c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], s3 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x1c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], s3 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x1c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x1c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x1e,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_and a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_and a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_and a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_and a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_and a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_and a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_and a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_and a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x20,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_and a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x20,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], s3 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x20,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], s3 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x20,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x20,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_and a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x22,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_or a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_or a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_or a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_or a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_or a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_or a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_or a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x24,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_or a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x24,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_or a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x24,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], s3 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x24,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], s3 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x24,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x24,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_or a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x26,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_xor a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_xor a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_xor a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_xor a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_xor a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_xor a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x28,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_xor a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x28,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_xor a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x28,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], s3 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x28,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], s3 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x28,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x28,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2a,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_inc a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_inc a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_inc a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_inc a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_inc a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_inc a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x2c,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_inc a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x2c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_inc a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x2c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], s3 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x2c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], s3 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x2c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x2c,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x2e,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_dec a255, off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0xff,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a255, off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_dec a5, off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_dec a5, off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_dec a5, off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_dec a5, off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_dec a5, off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x30,0xe1,0x00,0x05,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_dec a5, v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x30,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_dec a5, v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x30,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x30,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], s3 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 ; encoding: [0x00,0x00,0x30,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], s3 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x30,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x30,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x32,0xe1,0x00,0x05,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec a5, off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x80,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x80,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x80,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x80,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x80,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x80,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x82,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_swap_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[252:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0xfc,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[252:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x84,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x84,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x84,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 ; encoding: [0x00,0x00,0x84,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x84,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x84,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x86,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_cmpswap_x2 a[6:9], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_add_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x88,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x88,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x88,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x88,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x88,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x88,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8a,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_add_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x8c,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x8c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x8c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x8c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x8c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x8c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x8e,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_sub_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x90,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x90,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x90,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x90,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x90,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x90,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x92,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x94,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x94,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x94,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x94,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x94,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x94,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x96,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umin_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x98,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x98,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x98,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x98,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x98,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x98,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x98,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x9a,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_smax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0x9c,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0x9c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0x9c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x9c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0x9c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0x9c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0x9c,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0x9e,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_umax_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_and_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xa0,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xa0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xa0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xa0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xa0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xa2,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_and_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_or_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xa4,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xa4,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xa4,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa4,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa4,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xa4,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xa4,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xa6,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_or_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xa8,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xa8,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xa8,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa8,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xa8,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xa8,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xa8,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xaa,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_xor_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xac,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xac,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xac,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xac,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xac,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xac,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xac,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xae,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_inc_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[254:255], off, s[8:11], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0xfe,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[254:255], off, s[8:11], s3 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[12:15], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x83,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[12:15], s3 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[96:99], s3 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x98,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[96:99], s3 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s101 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x65] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], s101 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], m0 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x7c] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], m0 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], 0 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], 0 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], -1 offset:4095 ; encoding: [0xff,0x0f,0xb0,0xe1,0x00,0x06,0x82,0xc1] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], -1 offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 ; encoding: [0xff,0x2f,0xb0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 idxen offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 ; encoding: [0xff,0x1f,0xb0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], v0, s[8:11], s3 offen offset:4095 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xb0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 ; encoding: [0x00,0x00,0xb0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:7 ; encoding: [0x07,0x00,0xb0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:7 // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 glc ; encoding: [0xff,0x4f,0xb0,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 glc // GFX90A: buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 slc ; encoding: [0xff,0x0f,0xb2,0xe1,0x00,0x06,0x82,0x03] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction buffer_atomic_dec_x2 a[6:7], off, s[8:11], s3 offset:4095 slc // GFX90A: tbuffer_load_format_x a1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x78,0xe9,0x00,0x01,0x81,0x01] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_load_format_x a1, off, s[4:7], dfmt:15, nfmt:2, s1 // GFX90A: tbuffer_load_format_xy a[2:3], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x78,0xe9,0x00,0x02,0x81,0x01] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_load_format_xy a[2:3], off, s[4:7], dfmt:15, nfmt:2, s1 // GFX90A: tbuffer_load_format_xyz a[2:4], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x79,0xe9,0x00,0x02,0x81,0x01] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_load_format_xyz a[2:4], off, s[4:7], dfmt:15, nfmt:2, s1 // GFX90A: tbuffer_load_format_xyzw a[2:5], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x79,0xe9,0x00,0x02,0x81,0x01] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_load_format_xyzw a[2:5], off, s[4:7], dfmt:15, nfmt:2, s1 // GFX90A: tbuffer_store_format_x a1, off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x00,0x7a,0xe9,0x00,0x01,0x81,0x01] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_store_format_x a1, off, s[4:7], dfmt:15, nfmt:2, s1 // GFX90A: tbuffer_store_format_xy a[2:3], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7a,0xe9,0x00,0x02,0x81,0x01] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_store_format_xy a[2:3], off, s[4:7], dfmt:15, nfmt:2, s1 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, s[4:7], s1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x02,0x81,0x01] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_store_format_xyzw a[2:5], off, s[4:7], dfmt:15, nfmt:2, s1 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x02,0x9c,0x6d] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_RESERVED_15] ; encoding: [0x00,0x80,0x7b,0xe8,0x00,0x02,0x9c,0x6d] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], dfmt:15, nfmt:0, ttmp1 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x03,0xe9,0x00,0x02,0x9c,0x6d] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], dfmt:0, nfmt:2, ttmp1 // GFX90A: tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], ttmp1 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x02,0x9c,0x6d] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction tbuffer_store_format_xyzw a[2:5], off, ttmp[4:7], dfmt:15, nfmt:2, ttmp1 // GFX90A: ds_add_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x00,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u32 v1, a2 offset:65535 // GFX90A: ds_add_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x00,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u32 v255, a2 offset:65535 // GFX90A: ds_add_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x00,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u32 v1, a255 offset:65535 // GFX90A: ds_add_u32 v1, a2 ; encoding: [0x00,0x00,0x00,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u32 v1, a2 // GFX90A: ds_add_u32 v1, a2 ; encoding: [0x00,0x00,0x00,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u32 v1, a2 // GFX90A: ds_add_u32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x00,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u32 v1, a2 offset:4 // GFX90A: ds_sub_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x02,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u32 v1, a2 offset:65535 // GFX90A: ds_sub_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x02,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u32 v255, a2 offset:65535 // GFX90A: ds_sub_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x02,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u32 v1, a255 offset:65535 // GFX90A: ds_sub_u32 v1, a2 ; encoding: [0x00,0x00,0x02,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u32 v1, a2 // GFX90A: ds_sub_u32 v1, a2 ; encoding: [0x00,0x00,0x02,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u32 v1, a2 // GFX90A: ds_sub_u32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x02,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u32 v1, a2 offset:4 // GFX90A: ds_rsub_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x04,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u32 v1, a2 offset:65535 // GFX90A: ds_rsub_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x04,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u32 v255, a2 offset:65535 // GFX90A: ds_rsub_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x04,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u32 v1, a255 offset:65535 // GFX90A: ds_rsub_u32 v1, a2 ; encoding: [0x00,0x00,0x04,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u32 v1, a2 // GFX90A: ds_rsub_u32 v1, a2 ; encoding: [0x00,0x00,0x04,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u32 v1, a2 // GFX90A: ds_rsub_u32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x04,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u32 v1, a2 offset:4 // GFX90A: ds_inc_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x06,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u32 v1, a2 offset:65535 // GFX90A: ds_inc_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x06,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u32 v255, a2 offset:65535 // GFX90A: ds_inc_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x06,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u32 v1, a255 offset:65535 // GFX90A: ds_inc_u32 v1, a2 ; encoding: [0x00,0x00,0x06,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u32 v1, a2 // GFX90A: ds_inc_u32 v1, a2 ; encoding: [0x00,0x00,0x06,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u32 v1, a2 // GFX90A: ds_inc_u32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x06,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u32 v1, a2 offset:4 // GFX90A: ds_dec_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x08,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u32 v1, a2 offset:65535 // GFX90A: ds_dec_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x08,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u32 v255, a2 offset:65535 // GFX90A: ds_dec_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x08,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u32 v1, a255 offset:65535 // GFX90A: ds_dec_u32 v1, a2 ; encoding: [0x00,0x00,0x08,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u32 v1, a2 // GFX90A: ds_dec_u32 v1, a2 ; encoding: [0x00,0x00,0x08,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u32 v1, a2 // GFX90A: ds_dec_u32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x08,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u32 v1, a2 offset:4 // GFX90A: ds_min_i32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x0a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i32 v1, a2 offset:65535 // GFX90A: ds_min_i32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x0a,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i32 v255, a2 offset:65535 // GFX90A: ds_min_i32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x0a,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i32 v1, a255 offset:65535 // GFX90A: ds_min_i32 v1, a2 ; encoding: [0x00,0x00,0x0a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i32 v1, a2 // GFX90A: ds_min_i32 v1, a2 ; encoding: [0x00,0x00,0x0a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i32 v1, a2 // GFX90A: ds_min_i32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x0a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i32 v1, a2 offset:4 // GFX90A: ds_max_i32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x0c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i32 v1, a2 offset:65535 // GFX90A: ds_max_i32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x0c,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i32 v255, a2 offset:65535 // GFX90A: ds_max_i32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x0c,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i32 v1, a255 offset:65535 // GFX90A: ds_max_i32 v1, a2 ; encoding: [0x00,0x00,0x0c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i32 v1, a2 // GFX90A: ds_max_i32 v1, a2 ; encoding: [0x00,0x00,0x0c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i32 v1, a2 // GFX90A: ds_max_i32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x0c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i32 v1, a2 offset:4 // GFX90A: ds_min_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x0e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u32 v1, a2 offset:65535 // GFX90A: ds_min_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x0e,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u32 v255, a2 offset:65535 // GFX90A: ds_min_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x0e,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u32 v1, a255 offset:65535 // GFX90A: ds_min_u32 v1, a2 ; encoding: [0x00,0x00,0x0e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u32 v1, a2 // GFX90A: ds_min_u32 v1, a2 ; encoding: [0x00,0x00,0x0e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u32 v1, a2 // GFX90A: ds_min_u32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x0e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u32 v1, a2 offset:4 // GFX90A: ds_max_u32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x10,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u32 v1, a2 offset:65535 // GFX90A: ds_max_u32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x10,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u32 v255, a2 offset:65535 // GFX90A: ds_max_u32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x10,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u32 v1, a255 offset:65535 // GFX90A: ds_max_u32 v1, a2 ; encoding: [0x00,0x00,0x10,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u32 v1, a2 // GFX90A: ds_max_u32 v1, a2 ; encoding: [0x00,0x00,0x10,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u32 v1, a2 // GFX90A: ds_max_u32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x10,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u32 v1, a2 offset:4 // GFX90A: ds_and_b32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x12,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b32 v1, a2 offset:65535 // GFX90A: ds_and_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x12,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b32 v255, a2 offset:65535 // GFX90A: ds_and_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x12,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b32 v1, a255 offset:65535 // GFX90A: ds_and_b32 v1, a2 ; encoding: [0x00,0x00,0x12,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b32 v1, a2 // GFX90A: ds_and_b32 v1, a2 ; encoding: [0x00,0x00,0x12,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b32 v1, a2 // GFX90A: ds_and_b32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x12,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b32 v1, a2 offset:4 // GFX90A: ds_or_b32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x14,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b32 v1, a2 offset:65535 // GFX90A: ds_or_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x14,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b32 v255, a2 offset:65535 // GFX90A: ds_or_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x14,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b32 v1, a255 offset:65535 // GFX90A: ds_or_b32 v1, a2 ; encoding: [0x00,0x00,0x14,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b32 v1, a2 // GFX90A: ds_or_b32 v1, a2 ; encoding: [0x00,0x00,0x14,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b32 v1, a2 // GFX90A: ds_or_b32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x14,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b32 v1, a2 offset:4 // GFX90A: ds_xor_b32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x16,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b32 v1, a2 offset:65535 // GFX90A: ds_xor_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x16,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b32 v255, a2 offset:65535 // GFX90A: ds_xor_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x16,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b32 v1, a255 offset:65535 // GFX90A: ds_xor_b32 v1, a2 ; encoding: [0x00,0x00,0x16,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b32 v1, a2 // GFX90A: ds_xor_b32 v1, a2 ; encoding: [0x00,0x00,0x16,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b32 v1, a2 // GFX90A: ds_xor_b32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x16,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b32 v1, a2 offset:4 // GFX90A: ds_mskor_b32 v1, a2, a3 offset:65535 ; encoding: [0xff,0xff,0x18,0xda,0x01,0x02,0x03,0x00] @@ -7122,27 +7122,27 @@ ds_mskor_b32 v1, a2, a3 ds_mskor_b32 v1, a2, a3 offset:4 // GFX90A: ds_write_b32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x1a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b32 v1, a2 offset:65535 // GFX90A: ds_write_b32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x1a,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b32 v255, a2 offset:65535 // GFX90A: ds_write_b32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x1a,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b32 v1, a255 offset:65535 // GFX90A: ds_write_b32 v1, a2 ; encoding: [0x00,0x00,0x1a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b32 v1, a2 // GFX90A: ds_write_b32 v1, a2 ; encoding: [0x00,0x00,0x1a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b32 v1, a2 // GFX90A: ds_write_b32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x1a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b32 v1, a2 offset:4 // GFX90A: ds_write2_b32 v1, a2, a3 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x1c,0xda,0x01,0x02,0x03,0x00] @@ -7282,123 +7282,123 @@ ds_cmpst_f32 v1, a2, a3 ds_cmpst_f32 v1, a2, a3 offset:4 // GFX90A: ds_min_f32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x24,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f32 v1, a2 offset:65535 // GFX90A: ds_min_f32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x24,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f32 v255, a2 offset:65535 // GFX90A: ds_min_f32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x24,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f32 v1, a255 offset:65535 // GFX90A: ds_min_f32 v1, a2 ; encoding: [0x00,0x00,0x24,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f32 v1, a2 // GFX90A: ds_min_f32 v1, a2 ; encoding: [0x00,0x00,0x24,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f32 v1, a2 // GFX90A: ds_min_f32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x24,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f32 v1, a2 offset:4 // GFX90A: ds_max_f32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x26,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f32 v1, a2 offset:65535 // GFX90A: ds_max_f32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x26,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f32 v255, a2 offset:65535 // GFX90A: ds_max_f32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x26,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f32 v1, a255 offset:65535 // GFX90A: ds_max_f32 v1, a2 ; encoding: [0x00,0x00,0x26,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f32 v1, a2 // GFX90A: ds_max_f32 v1, a2 ; encoding: [0x00,0x00,0x26,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f32 v1, a2 // GFX90A: ds_max_f32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x26,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f32 v1, a2 offset:4 // GFX90A: ds_add_f32 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x2a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_f32 v1, a2 offset:65535 // GFX90A: ds_add_f32 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x2a,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_f32 v255, a2 offset:65535 // GFX90A: ds_add_f32 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x2a,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_f32 v1, a255 offset:65535 // GFX90A: ds_add_f32 v1, a2 ; encoding: [0x00,0x00,0x2a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_f32 v1, a2 // GFX90A: ds_add_f32 v1, a2 ; encoding: [0x00,0x00,0x2a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_f32 v1, a2 // GFX90A: ds_add_f32 v1, a2 offset:4 ; encoding: [0x04,0x00,0x2a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_f32 v1, a2 offset:4 // GFX90A: ds_write_b8 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x3c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8 v1, a2 offset:65535 // GFX90A: ds_write_b8 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x3c,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8 v255, a2 offset:65535 // GFX90A: ds_write_b8 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x3c,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8 v1, a255 offset:65535 // GFX90A: ds_write_b8 v1, a2 ; encoding: [0x00,0x00,0x3c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8 v1, a2 // GFX90A: ds_write_b8 v1, a2 ; encoding: [0x00,0x00,0x3c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8 v1, a2 // GFX90A: ds_write_b8 v1, a2 offset:4 ; encoding: [0x04,0x00,0x3c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8 v1, a2 offset:4 // GFX90A: ds_write_b16 v1, a2 offset:65535 ; encoding: [0xff,0xff,0x3e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16 v1, a2 offset:65535 // GFX90A: ds_write_b16 v255, a2 offset:65535 ; encoding: [0xff,0xff,0x3e,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16 v255, a2 offset:65535 // GFX90A: ds_write_b16 v1, a255 offset:65535 ; encoding: [0xff,0xff,0x3e,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16 v1, a255 offset:65535 // GFX90A: ds_write_b16 v1, a2 ; encoding: [0x00,0x00,0x3e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16 v1, a2 // GFX90A: ds_write_b16 v1, a2 ; encoding: [0x00,0x00,0x3e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16 v1, a2 // GFX90A: ds_write_b16 v1, a2 offset:4 ; encoding: [0x04,0x00,0x3e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16 v1, a2 offset:4 // GFX90A: ds_add_rtn_u32 a5, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x40,0xda,0x01,0x02,0x00,0x05] @@ -8066,219 +8066,219 @@ ds_add_rtn_f32 a5, v1, a2 ds_add_rtn_f32 a5, v1, a2 offset:4 // GFX90A: ds_read_b32 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x6c,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b32 a5, v1 offset:65535 // GFX90A: ds_read_b32 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x6c,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b32 a255, v1 offset:65535 // GFX90A: ds_read_b32 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x6c,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b32 a5, v255 offset:65535 // GFX90A: ds_read_b32 a5, v1 ; encoding: [0x00,0x00,0x6c,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b32 a5, v1 // GFX90A: ds_read_b32 a5, v1 ; encoding: [0x00,0x00,0x6c,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b32 a5, v1 // GFX90A: ds_read_b32 a5, v1 offset:4 ; encoding: [0x04,0x00,0x6c,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b32 a5, v1 offset:4 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x6e,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v1 offset0:127 offset1:255 // GFX90A: ds_read2_b32 a[254:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x6e,0xda,0x01,0x00,0x00,0xfe] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[254:255], v1 offset0:127 offset1:255 // GFX90A: ds_read2_b32 a[6:7], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x6e,0xda,0xff,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v255 offset0:127 offset1:255 // GFX90A: ds_read2_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x6e,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v1 offset1:255 // GFX90A: ds_read2_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x6e,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v1 offset1:255 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0x6e,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v1 offset0:16 offset1:255 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x6e,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v1 offset0:127 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x6e,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v1 offset0:127 // GFX90A: ds_read2_b32 a[6:7], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0x6e,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b32 a[6:7], v1 offset0:127 offset1:1 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x70,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:255 // GFX90A: ds_read2st64_b32 a[254:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x70,0xda,0x01,0x00,0x00,0xfe] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[254:255], v1 offset0:127 offset1:255 // GFX90A: ds_read2st64_b32 a[6:7], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x70,0xda,0xff,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v255 offset0:127 offset1:255 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x70,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v1 offset1:255 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset1:255 ; encoding: [0x00,0xff,0x70,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v1 offset1:255 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0x70,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v1 offset0:16 offset1:255 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x70,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v1 offset0:127 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 ; encoding: [0x7f,0x00,0x70,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v1 offset0:127 // GFX90A: ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0x70,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b32 a[6:7], v1 offset0:127 offset1:1 // GFX90A: ds_read_i8 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x72,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8 a5, v1 offset:65535 // GFX90A: ds_read_i8 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x72,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8 a255, v1 offset:65535 // GFX90A: ds_read_i8 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x72,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8 a5, v255 offset:65535 // GFX90A: ds_read_i8 a5, v1 ; encoding: [0x00,0x00,0x72,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8 a5, v1 // GFX90A: ds_read_i8 a5, v1 ; encoding: [0x00,0x00,0x72,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8 a5, v1 // GFX90A: ds_read_i8 a5, v1 offset:4 ; encoding: [0x04,0x00,0x72,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8 a5, v1 offset:4 // GFX90A: ds_read_u8 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x74,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8 a5, v1 offset:65535 // GFX90A: ds_read_u8 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x74,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8 a255, v1 offset:65535 // GFX90A: ds_read_u8 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x74,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8 a5, v255 offset:65535 // GFX90A: ds_read_u8 a5, v1 ; encoding: [0x00,0x00,0x74,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8 a5, v1 // GFX90A: ds_read_u8 a5, v1 ; encoding: [0x00,0x00,0x74,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8 a5, v1 // GFX90A: ds_read_u8 a5, v1 offset:4 ; encoding: [0x04,0x00,0x74,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8 a5, v1 offset:4 // GFX90A: ds_read_i16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x76,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i16 a5, v1 offset:65535 // GFX90A: ds_read_i16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x76,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i16 a255, v1 offset:65535 // GFX90A: ds_read_i16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x76,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i16 a5, v255 offset:65535 // GFX90A: ds_read_i16 a5, v1 ; encoding: [0x00,0x00,0x76,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i16 a5, v1 // GFX90A: ds_read_i16 a5, v1 ; encoding: [0x00,0x00,0x76,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i16 a5, v1 // GFX90A: ds_read_i16 a5, v1 offset:4 ; encoding: [0x04,0x00,0x76,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i16 a5, v1 offset:4 // GFX90A: ds_read_u16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0x78,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16 a5, v1 offset:65535 // GFX90A: ds_read_u16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0x78,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16 a255, v1 offset:65535 // GFX90A: ds_read_u16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0x78,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16 a5, v255 offset:65535 // GFX90A: ds_read_u16 a5, v1 ; encoding: [0x00,0x00,0x78,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16 a5, v1 // GFX90A: ds_read_u16 a5, v1 ; encoding: [0x00,0x00,0x78,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16 a5, v1 // GFX90A: ds_read_u16 a5, v1 offset:4 ; encoding: [0x04,0x00,0x78,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16 a5, v1 offset:4 // GFX90A: ds_swizzle_b32 a5, v1 offset:swizzle(FFT,31) ; encoding: [0xff,0xff,0x7a,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_swizzle_b32 a5, v1 offset:65535 // GFX90A: ds_swizzle_b32 a255, v1 offset:swizzle(FFT,31) ; encoding: [0xff,0xff,0x7a,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_swizzle_b32 a255, v1 offset:65535 // GFX90A: ds_swizzle_b32 a5, v255 offset:swizzle(FFT,31) ; encoding: [0xff,0xff,0x7a,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_swizzle_b32 a5, v255 offset:65535 // GFX90A: ds_swizzle_b32 a5, v1 ; encoding: [0x00,0x00,0x7a,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_swizzle_b32 a5, v1 // GFX90A: ds_swizzle_b32 a5, v1 ; encoding: [0x00,0x00,0x7a,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_swizzle_b32 a5, v1 // GFX90A: ds_swizzle_b32 a5, v1 offset:swizzle(BITMASK_PERM,"00p00") ; encoding: [0x04,0x00,0x7a,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_swizzle_b32 a5, v1 offset:swizzle(BITMASK_PERM,"00p00") // GFX90A: ds_permute_b32 a5, v1, a2 offset:65535 ; encoding: [0xff,0xff,0x7c,0xda,0x01,0x02,0x00,0x05] @@ -8338,291 +8338,291 @@ ds_bpermute_b32 a5, v1, a2 ds_bpermute_b32 a5, v1, a2 offset:4 // GFX90A: ds_add_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x80,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u64 v1, a[2:3] offset:65535 // GFX90A: ds_add_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x80,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u64 v255, a[2:3] offset:65535 // GFX90A: ds_add_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x80,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u64 v1, a[254:255] offset:65535 // GFX90A: ds_add_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x80,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u64 v1, a[2:3] // GFX90A: ds_add_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x80,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u64 v1, a[2:3] // GFX90A: ds_add_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x80,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_add_u64 v1, a[2:3] offset:4 // GFX90A: ds_sub_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x82,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u64 v1, a[2:3] offset:65535 // GFX90A: ds_sub_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x82,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u64 v255, a[2:3] offset:65535 // GFX90A: ds_sub_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x82,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u64 v1, a[254:255] offset:65535 // GFX90A: ds_sub_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x82,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u64 v1, a[2:3] // GFX90A: ds_sub_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x82,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u64 v1, a[2:3] // GFX90A: ds_sub_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x82,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_sub_u64 v1, a[2:3] offset:4 // GFX90A: ds_rsub_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x84,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u64 v1, a[2:3] offset:65535 // GFX90A: ds_rsub_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x84,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u64 v255, a[2:3] offset:65535 // GFX90A: ds_rsub_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x84,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u64 v1, a[254:255] offset:65535 // GFX90A: ds_rsub_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x84,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u64 v1, a[2:3] // GFX90A: ds_rsub_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x84,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u64 v1, a[2:3] // GFX90A: ds_rsub_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x84,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_rsub_u64 v1, a[2:3] offset:4 // GFX90A: ds_inc_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x86,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u64 v1, a[2:3] offset:65535 // GFX90A: ds_inc_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x86,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u64 v255, a[2:3] offset:65535 // GFX90A: ds_inc_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x86,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u64 v1, a[254:255] offset:65535 // GFX90A: ds_inc_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x86,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u64 v1, a[2:3] // GFX90A: ds_inc_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x86,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u64 v1, a[2:3] // GFX90A: ds_inc_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x86,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_inc_u64 v1, a[2:3] offset:4 // GFX90A: ds_dec_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x88,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u64 v1, a[2:3] offset:65535 // GFX90A: ds_dec_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x88,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u64 v255, a[2:3] offset:65535 // GFX90A: ds_dec_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x88,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u64 v1, a[254:255] offset:65535 // GFX90A: ds_dec_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x88,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u64 v1, a[2:3] // GFX90A: ds_dec_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x88,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u64 v1, a[2:3] // GFX90A: ds_dec_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x88,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_dec_u64 v1, a[2:3] offset:4 // GFX90A: ds_min_i64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i64 v1, a[2:3] offset:65535 // GFX90A: ds_min_i64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8a,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i64 v255, a[2:3] offset:65535 // GFX90A: ds_min_i64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x8a,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i64 v1, a[254:255] offset:65535 // GFX90A: ds_min_i64 v1, a[2:3] ; encoding: [0x00,0x00,0x8a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i64 v1, a[2:3] // GFX90A: ds_min_i64 v1, a[2:3] ; encoding: [0x00,0x00,0x8a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i64 v1, a[2:3] // GFX90A: ds_min_i64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x8a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_i64 v1, a[2:3] offset:4 // GFX90A: ds_max_i64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i64 v1, a[2:3] offset:65535 // GFX90A: ds_max_i64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8c,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i64 v255, a[2:3] offset:65535 // GFX90A: ds_max_i64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x8c,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i64 v1, a[254:255] offset:65535 // GFX90A: ds_max_i64 v1, a[2:3] ; encoding: [0x00,0x00,0x8c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i64 v1, a[2:3] // GFX90A: ds_max_i64 v1, a[2:3] ; encoding: [0x00,0x00,0x8c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i64 v1, a[2:3] // GFX90A: ds_max_i64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x8c,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_i64 v1, a[2:3] offset:4 // GFX90A: ds_min_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u64 v1, a[2:3] offset:65535 // GFX90A: ds_min_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x8e,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u64 v255, a[2:3] offset:65535 // GFX90A: ds_min_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x8e,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u64 v1, a[254:255] offset:65535 // GFX90A: ds_min_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x8e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u64 v1, a[2:3] // GFX90A: ds_min_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x8e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u64 v1, a[2:3] // GFX90A: ds_min_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x8e,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_u64 v1, a[2:3] offset:4 // GFX90A: ds_max_u64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x90,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u64 v1, a[2:3] offset:65535 // GFX90A: ds_max_u64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x90,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u64 v255, a[2:3] offset:65535 // GFX90A: ds_max_u64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x90,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u64 v1, a[254:255] offset:65535 // GFX90A: ds_max_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x90,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u64 v1, a[2:3] // GFX90A: ds_max_u64 v1, a[2:3] ; encoding: [0x00,0x00,0x90,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u64 v1, a[2:3] // GFX90A: ds_max_u64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x90,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_u64 v1, a[2:3] offset:4 // GFX90A: ds_and_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x92,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b64 v1, a[2:3] offset:65535 // GFX90A: ds_and_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x92,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b64 v255, a[2:3] offset:65535 // GFX90A: ds_and_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x92,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b64 v1, a[254:255] offset:65535 // GFX90A: ds_and_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x92,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b64 v1, a[2:3] // GFX90A: ds_and_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x92,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b64 v1, a[2:3] // GFX90A: ds_and_b64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x92,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_and_b64 v1, a[2:3] offset:4 // GFX90A: ds_or_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x94,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b64 v1, a[2:3] offset:65535 // GFX90A: ds_or_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x94,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b64 v255, a[2:3] offset:65535 // GFX90A: ds_or_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x94,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b64 v1, a[254:255] offset:65535 // GFX90A: ds_or_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x94,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b64 v1, a[2:3] // GFX90A: ds_or_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x94,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b64 v1, a[2:3] // GFX90A: ds_or_b64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x94,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_or_b64 v1, a[2:3] offset:4 // GFX90A: ds_xor_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x96,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b64 v1, a[2:3] offset:65535 // GFX90A: ds_xor_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x96,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b64 v255, a[2:3] offset:65535 // GFX90A: ds_xor_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x96,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b64 v1, a[254:255] offset:65535 // GFX90A: ds_xor_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x96,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b64 v1, a[2:3] // GFX90A: ds_xor_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x96,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b64 v1, a[2:3] // GFX90A: ds_xor_b64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x96,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_xor_b64 v1, a[2:3] offset:4 // GFX90A: ds_mskor_b64 v1, a[2:3], a[4:5] offset:65535 ; encoding: [0xff,0xff,0x98,0xda,0x01,0x02,0x04,0x00] @@ -8654,27 +8654,27 @@ ds_mskor_b64 v1, a[2:3], a[4:5] ds_mskor_b64 v1, a[2:3], a[4:5] offset:4 // GFX90A: ds_write_b64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x9a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b64 v1, a[2:3] offset:65535 // GFX90A: ds_write_b64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0x9a,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b64 v255, a[2:3] offset:65535 // GFX90A: ds_write_b64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0x9a,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b64 v1, a[254:255] offset:65535 // GFX90A: ds_write_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x9a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b64 v1, a[2:3] // GFX90A: ds_write_b64 v1, a[2:3] ; encoding: [0x00,0x00,0x9a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b64 v1, a[2:3] // GFX90A: ds_write_b64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0x9a,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b64 v1, a[2:3] offset:4 // GFX90A: ds_write2_b64 v1, a[2:3], a[4:5] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x9c,0xda,0x01,0x02,0x04,0x00] @@ -8814,243 +8814,243 @@ ds_cmpst_f64 v1, a[2:3], a[4:5] ds_cmpst_f64 v1, a[2:3], a[4:5] offset:4 // GFX90A: ds_min_f64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa4,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f64 v1, a[2:3] offset:65535 // GFX90A: ds_min_f64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa4,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f64 v255, a[2:3] offset:65535 // GFX90A: ds_min_f64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0xa4,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f64 v1, a[254:255] offset:65535 // GFX90A: ds_min_f64 v1, a[2:3] ; encoding: [0x00,0x00,0xa4,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f64 v1, a[2:3] // GFX90A: ds_min_f64 v1, a[2:3] ; encoding: [0x00,0x00,0xa4,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f64 v1, a[2:3] // GFX90A: ds_min_f64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0xa4,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_min_f64 v1, a[2:3] offset:4 // GFX90A: ds_max_f64 v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa6,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f64 v1, a[2:3] offset:65535 // GFX90A: ds_max_f64 v255, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xa6,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f64 v255, a[2:3] offset:65535 // GFX90A: ds_max_f64 v1, a[254:255] offset:65535 ; encoding: [0xff,0xff,0xa6,0xda,0x01,0xfe,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f64 v1, a[254:255] offset:65535 // GFX90A: ds_max_f64 v1, a[2:3] ; encoding: [0x00,0x00,0xa6,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f64 v1, a[2:3] // GFX90A: ds_max_f64 v1, a[2:3] ; encoding: [0x00,0x00,0xa6,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f64 v1, a[2:3] // GFX90A: ds_max_f64 v1, a[2:3] offset:4 ; encoding: [0x04,0x00,0xa6,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_max_f64 v1, a[2:3] offset:4 // GFX90A: ds_write_b8_d16_hi v1, a2 offset:65535 ; encoding: [0xff,0xff,0xa8,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8_d16_hi v1, a2 offset:65535 // GFX90A: ds_write_b8_d16_hi v255, a2 offset:65535 ; encoding: [0xff,0xff,0xa8,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8_d16_hi v255, a2 offset:65535 // GFX90A: ds_write_b8_d16_hi v1, a255 offset:65535 ; encoding: [0xff,0xff,0xa8,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8_d16_hi v1, a255 offset:65535 // GFX90A: ds_write_b8_d16_hi v1, a2 ; encoding: [0x00,0x00,0xa8,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8_d16_hi v1, a2 // GFX90A: ds_write_b8_d16_hi v1, a2 ; encoding: [0x00,0x00,0xa8,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8_d16_hi v1, a2 // GFX90A: ds_write_b8_d16_hi v1, a2 offset:4 ; encoding: [0x04,0x00,0xa8,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b8_d16_hi v1, a2 offset:4 // GFX90A: ds_write_b16_d16_hi v1, a2 offset:65535 ; encoding: [0xff,0xff,0xaa,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16_d16_hi v1, a2 offset:65535 // GFX90A: ds_write_b16_d16_hi v255, a2 offset:65535 ; encoding: [0xff,0xff,0xaa,0xda,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16_d16_hi v255, a2 offset:65535 // GFX90A: ds_write_b16_d16_hi v1, a255 offset:65535 ; encoding: [0xff,0xff,0xaa,0xda,0x01,0xff,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16_d16_hi v1, a255 offset:65535 // GFX90A: ds_write_b16_d16_hi v1, a2 ; encoding: [0x00,0x00,0xaa,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16_d16_hi v1, a2 // GFX90A: ds_write_b16_d16_hi v1, a2 ; encoding: [0x00,0x00,0xaa,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16_d16_hi v1, a2 // GFX90A: ds_write_b16_d16_hi v1, a2 offset:4 ; encoding: [0x04,0x00,0xaa,0xda,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b16_d16_hi v1, a2 offset:4 // GFX90A: ds_read_u8_d16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0xac,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16 a5, v1 offset:65535 // GFX90A: ds_read_u8_d16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0xac,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16 a255, v1 offset:65535 // GFX90A: ds_read_u8_d16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0xac,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16 a5, v255 offset:65535 // GFX90A: ds_read_u8_d16 a5, v1 ; encoding: [0x00,0x00,0xac,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16 a5, v1 // GFX90A: ds_read_u8_d16 a5, v1 ; encoding: [0x00,0x00,0xac,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16 a5, v1 // GFX90A: ds_read_u8_d16 a5, v1 offset:4 ; encoding: [0x04,0x00,0xac,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16 a5, v1 offset:4 // GFX90A: ds_read_u8_d16_hi a5, v1 offset:65535 ; encoding: [0xff,0xff,0xae,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16_hi a5, v1 offset:65535 // GFX90A: ds_read_u8_d16_hi a255, v1 offset:65535 ; encoding: [0xff,0xff,0xae,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16_hi a255, v1 offset:65535 // GFX90A: ds_read_u8_d16_hi a5, v255 offset:65535 ; encoding: [0xff,0xff,0xae,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16_hi a5, v255 offset:65535 // GFX90A: ds_read_u8_d16_hi a5, v1 ; encoding: [0x00,0x00,0xae,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16_hi a5, v1 // GFX90A: ds_read_u8_d16_hi a5, v1 ; encoding: [0x00,0x00,0xae,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16_hi a5, v1 // GFX90A: ds_read_u8_d16_hi a5, v1 offset:4 ; encoding: [0x04,0x00,0xae,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u8_d16_hi a5, v1 offset:4 // GFX90A: ds_read_i8_d16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb0,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16 a5, v1 offset:65535 // GFX90A: ds_read_i8_d16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb0,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16 a255, v1 offset:65535 // GFX90A: ds_read_i8_d16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb0,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16 a5, v255 offset:65535 // GFX90A: ds_read_i8_d16 a5, v1 ; encoding: [0x00,0x00,0xb0,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16 a5, v1 // GFX90A: ds_read_i8_d16 a5, v1 ; encoding: [0x00,0x00,0xb0,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16 a5, v1 // GFX90A: ds_read_i8_d16 a5, v1 offset:4 ; encoding: [0x04,0x00,0xb0,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16 a5, v1 offset:4 // GFX90A: ds_read_i8_d16_hi a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb2,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16_hi a5, v1 offset:65535 // GFX90A: ds_read_i8_d16_hi a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb2,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16_hi a255, v1 offset:65535 // GFX90A: ds_read_i8_d16_hi a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb2,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16_hi a5, v255 offset:65535 // GFX90A: ds_read_i8_d16_hi a5, v1 ; encoding: [0x00,0x00,0xb2,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16_hi a5, v1 // GFX90A: ds_read_i8_d16_hi a5, v1 ; encoding: [0x00,0x00,0xb2,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16_hi a5, v1 // GFX90A: ds_read_i8_d16_hi a5, v1 offset:4 ; encoding: [0x04,0x00,0xb2,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_i8_d16_hi a5, v1 offset:4 // GFX90A: ds_read_u16_d16 a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb4,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16 a5, v1 offset:65535 // GFX90A: ds_read_u16_d16 a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb4,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16 a255, v1 offset:65535 // GFX90A: ds_read_u16_d16 a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb4,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16 a5, v255 offset:65535 // GFX90A: ds_read_u16_d16 a5, v1 ; encoding: [0x00,0x00,0xb4,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16 a5, v1 // GFX90A: ds_read_u16_d16 a5, v1 ; encoding: [0x00,0x00,0xb4,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16 a5, v1 // GFX90A: ds_read_u16_d16 a5, v1 offset:4 ; encoding: [0x04,0x00,0xb4,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16 a5, v1 offset:4 // GFX90A: ds_read_u16_d16_hi a5, v1 offset:65535 ; encoding: [0xff,0xff,0xb6,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16_hi a5, v1 offset:65535 // GFX90A: ds_read_u16_d16_hi a255, v1 offset:65535 ; encoding: [0xff,0xff,0xb6,0xda,0x01,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16_hi a255, v1 offset:65535 // GFX90A: ds_read_u16_d16_hi a5, v255 offset:65535 ; encoding: [0xff,0xff,0xb6,0xda,0xff,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16_hi a5, v255 offset:65535 // GFX90A: ds_read_u16_d16_hi a5, v1 ; encoding: [0x00,0x00,0xb6,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16_hi a5, v1 // GFX90A: ds_read_u16_d16_hi a5, v1 ; encoding: [0x00,0x00,0xb6,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16_hi a5, v1 // GFX90A: ds_read_u16_d16_hi a5, v1 offset:4 ; encoding: [0x04,0x00,0xb6,0xda,0x01,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_u16_d16_hi a5, v1 offset:4 // GFX90A: ds_add_rtn_u64 a[6:7], v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xc0,0xda,0x01,0x02,0x00,0x06] @@ -9658,99 +9658,99 @@ ds_max_rtn_f64 a[6:7], v1, a[2:3] ds_max_rtn_f64 a[6:7], v1, a[2:3] offset:4 // GFX90A: ds_read_b64 a[6:7], v1 offset:65535 ; encoding: [0xff,0xff,0xec,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b64 a[6:7], v1 offset:65535 // GFX90A: ds_read_b64 a[254:255], v1 offset:65535 ; encoding: [0xff,0xff,0xec,0xda,0x01,0x00,0x00,0xfe] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b64 a[254:255], v1 offset:65535 // GFX90A: ds_read_b64 a[6:7], v255 offset:65535 ; encoding: [0xff,0xff,0xec,0xda,0xff,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b64 a[6:7], v255 offset:65535 // GFX90A: ds_read_b64 a[6:7], v1 ; encoding: [0x00,0x00,0xec,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b64 a[6:7], v1 // GFX90A: ds_read_b64 a[6:7], v1 ; encoding: [0x00,0x00,0xec,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b64 a[6:7], v1 // GFX90A: ds_read_b64 a[6:7], v1 offset:4 ; encoding: [0x04,0x00,0xec,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b64 a[6:7], v1 offset:4 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xee,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v1 offset0:127 offset1:255 // GFX90A: ds_read2_b64 a[252:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xee,0xda,0x01,0x00,0x00,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[252:255], v1 offset0:127 offset1:255 // GFX90A: ds_read2_b64 a[6:9], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xee,0xda,0xff,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v255 offset0:127 offset1:255 // GFX90A: ds_read2_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xee,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v1 offset1:255 // GFX90A: ds_read2_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xee,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v1 offset1:255 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0xee,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v1 offset0:16 offset1:255 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xee,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v1 offset0:127 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xee,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v1 offset0:127 // GFX90A: ds_read2_b64 a[6:9], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0xee,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2_b64 a[6:9], v1 offset0:127 offset1:1 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xf0,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:255 // GFX90A: ds_read2st64_b64 a[252:255], v1 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xf0,0xda,0x01,0x00,0x00,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[252:255], v1 offset0:127 offset1:255 // GFX90A: ds_read2st64_b64 a[6:9], v255 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xf0,0xda,0xff,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v255 offset0:127 offset1:255 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xf0,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v1 offset1:255 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset1:255 ; encoding: [0x00,0xff,0xf0,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v1 offset1:255 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:16 offset1:255 ; encoding: [0x10,0xff,0xf0,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v1 offset0:16 offset1:255 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xf0,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v1 offset0:127 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 ; encoding: [0x7f,0x00,0xf0,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v1 offset0:127 // GFX90A: ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:1 ; encoding: [0x7f,0x01,0xf0,0xda,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read2st64_b64 a[6:9], v1 offset0:127 offset1:1 // GFX90A: ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:65535 ; encoding: [0xff,0xff,0xfc,0xda,0x01,0x02,0x00,0x06] @@ -9782,921 +9782,921 @@ ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] ds_condxchg32_rtn_b64 a[6:7], v1, a[2:3] offset:4 // GFX90A: ds_gws_init a0 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_init a0 offset:65535 gds // GFX90A: ds_gws_init a254 offset:65535 gds ; encoding: [0xff,0xff,0x33,0xdb,0xfe,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_init a254 offset:65535 gds // GFX90A: ds_gws_init a2 gds ; encoding: [0x00,0x00,0x33,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_init a2 gds // GFX90A: ds_gws_init a0 gds ; encoding: [0x00,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_init a0 gds // GFX90A: ds_gws_init a0 offset:4 gds ; encoding: [0x04,0x00,0x33,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_init a0 offset:4 gds // GFX90A: ds_gws_sema_br a2 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_sema_br a2 offset:65535 gds // GFX90A: ds_gws_sema_br a254 offset:65535 gds ; encoding: [0xff,0xff,0x37,0xdb,0xfe,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_sema_br a254 offset:65535 gds // GFX90A: ds_gws_sema_br a0 gds ; encoding: [0x00,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_sema_br a0 gds // GFX90A: ds_gws_sema_br a2 gds ; encoding: [0x00,0x00,0x37,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_sema_br a2 gds // GFX90A: ds_gws_sema_br a0 offset:4 gds ; encoding: [0x04,0x00,0x37,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_sema_br a0 offset:4 gds // GFX90A: ds_gws_barrier a2 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_barrier a2 offset:65535 gds // GFX90A: ds_gws_barrier a254 offset:65535 gds ; encoding: [0xff,0xff,0x3b,0xdb,0xfe,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_barrier a254 offset:65535 gds // GFX90A: ds_gws_barrier a0 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_barrier a0 gds // GFX90A: ds_gws_barrier a2 gds ; encoding: [0x00,0x00,0x3b,0xdb,0x02,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_barrier a2 gds // GFX90A: ds_gws_barrier a0 offset:4 gds ; encoding: [0x04,0x00,0x3b,0xdb,0x00,0x00,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_gws_barrier a0 offset:4 gds // GFX90A: ds_consume a5 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_consume a5 offset:65535 // GFX90A: ds_consume a255 offset:65535 ; encoding: [0xff,0xff,0x7a,0xdb,0x00,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_consume a255 offset:65535 // GFX90A: ds_consume a5 ; encoding: [0x00,0x00,0x7a,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_consume a5 // GFX90A: ds_consume a5 ; encoding: [0x00,0x00,0x7a,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_consume a5 // GFX90A: ds_consume a5 offset:4 ; encoding: [0x04,0x00,0x7a,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_consume a5 offset:4 // GFX90A: ds_append a5 offset:65535 ; encoding: [0xff,0xff,0x7c,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_append a5 offset:65535 // GFX90A: ds_append a255 offset:65535 ; encoding: [0xff,0xff,0x7c,0xdb,0x00,0x00,0x00,0xff] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_append a255 offset:65535 // GFX90A: ds_append a5 ; encoding: [0x00,0x00,0x7c,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_append a5 // GFX90A: ds_append a5 ; encoding: [0x00,0x00,0x7c,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_append a5 // GFX90A: ds_append a5 offset:4 ; encoding: [0x04,0x00,0x7c,0xdb,0x00,0x00,0x00,0x05] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_append a5 offset:4 // GFX90A: ds_write_b96 v1, a[2:4] offset:65535 ; encoding: [0xff,0xff,0xbc,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b96 v1, a[2:4] offset:65535 // GFX90A: ds_write_b96 v255, a[2:4] offset:65535 ; encoding: [0xff,0xff,0xbc,0xdb,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b96 v255, a[2:4] offset:65535 // GFX90A: ds_write_b96 v1, a[252:254] offset:65535 ; encoding: [0xff,0xff,0xbc,0xdb,0x01,0xfc,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b96 v1, a[252:254] offset:65535 // GFX90A: ds_write_b96 v1, a[2:4] ; encoding: [0x00,0x00,0xbc,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b96 v1, a[2:4] // GFX90A: ds_write_b96 v1, a[2:4] ; encoding: [0x00,0x00,0xbc,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b96 v1, a[2:4] // GFX90A: ds_write_b96 v1, a[2:4] offset:4 ; encoding: [0x04,0x00,0xbc,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b96 v1, a[2:4] offset:4 // GFX90A: ds_write_b128 v1, a[2:5] offset:65535 ; encoding: [0xff,0xff,0xbe,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b128 v1, a[2:5] offset:65535 // GFX90A: ds_write_b128 v255, a[2:5] offset:65535 ; encoding: [0xff,0xff,0xbe,0xdb,0xff,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b128 v255, a[2:5] offset:65535 // GFX90A: ds_write_b128 v1, a[252:255] offset:65535 ; encoding: [0xff,0xff,0xbe,0xdb,0x01,0xfc,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b128 v1, a[252:255] offset:65535 // GFX90A: ds_write_b128 v1, a[2:5] ; encoding: [0x00,0x00,0xbe,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b128 v1, a[2:5] // GFX90A: ds_write_b128 v1, a[2:5] ; encoding: [0x00,0x00,0xbe,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b128 v1, a[2:5] // GFX90A: ds_write_b128 v1, a[2:5] offset:4 ; encoding: [0x04,0x00,0xbe,0xdb,0x01,0x02,0x00,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_write_b128 v1, a[2:5] offset:4 // GFX90A: ds_read_b96 a[6:8], v1 offset:65535 ; encoding: [0xff,0xff,0xfc,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b96 a[6:8], v1 offset:65535 // GFX90A: ds_read_b96 a[252:254], v1 offset:65535 ; encoding: [0xff,0xff,0xfc,0xdb,0x01,0x00,0x00,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b96 a[252:254], v1 offset:65535 // GFX90A: ds_read_b96 a[6:8], v255 offset:65535 ; encoding: [0xff,0xff,0xfc,0xdb,0xff,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b96 a[6:8], v255 offset:65535 // GFX90A: ds_read_b96 a[6:8], v1 ; encoding: [0x00,0x00,0xfc,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b96 a[6:8], v1 // GFX90A: ds_read_b96 a[6:8], v1 ; encoding: [0x00,0x00,0xfc,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b96 a[6:8], v1 // GFX90A: ds_read_b96 a[6:8], v1 offset:4 ; encoding: [0x04,0x00,0xfc,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b96 a[6:8], v1 offset:4 // GFX90A: ds_read_b128 a[6:9], v1 offset:65535 ; encoding: [0xff,0xff,0xfe,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b128 a[6:9], v1 offset:65535 // GFX90A: ds_read_b128 a[252:255], v1 offset:65535 ; encoding: [0xff,0xff,0xfe,0xdb,0x01,0x00,0x00,0xfc] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b128 a[252:255], v1 offset:65535 // GFX90A: ds_read_b128 a[6:9], v255 offset:65535 ; encoding: [0xff,0xff,0xfe,0xdb,0xff,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b128 a[6:9], v255 offset:65535 // GFX90A: ds_read_b128 a[6:9], v1 ; encoding: [0x00,0x00,0xfe,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b128 a[6:9], v1 // GFX90A: ds_read_b128 a[6:9], v1 ; encoding: [0x00,0x00,0xfe,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b128 a[6:9], v1 // GFX90A: ds_read_b128 a[6:9], v1 offset:4 ; encoding: [0x04,0x00,0xfe,0xdb,0x01,0x00,0x00,0x06] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction ds_read_b128 a[6:9], v1 offset:4 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x1 // GFX90A: image_load a252, v[2:5], s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a252, v[2:5], s[8:15] dmask:0x1 // GFX90A: image_load a5, v[252:255], s[8:15] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[252:255], s[8:15] dmask:0x1 // GFX90A: image_load a5, v[2:5], s[12:19] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[12:19] dmask:0x1 // GFX90A: image_load a5, v[2:5], s[92:99] dmask:0x1 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[92:99] dmask:0x1 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x2 ; encoding: [0x00,0x02,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x2 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x3 ; encoding: [0x00,0x03,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:7], v[2:5], s[8:15] dmask:0x3 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x4 ; encoding: [0x00,0x04,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x4 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x5 ; encoding: [0x00,0x05,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:7], v[2:5], s[8:15] dmask:0x5 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x6 ; encoding: [0x00,0x06,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:7], v[2:5], s[8:15] dmask:0x6 // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0x7 ; encoding: [0x00,0x07,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:8], v[2:5], s[8:15] dmask:0x7 // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x8 ; encoding: [0x00,0x08,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x8 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0x9 ; encoding: [0x00,0x09,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:7], v[2:5], s[8:15] dmask:0x9 // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0xa ; encoding: [0x00,0x0a,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:7], v[2:5], s[8:15] dmask:0xa // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0xb ; encoding: [0x00,0x0b,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:8], v[2:5], s[8:15] dmask:0xb // GFX90A: image_load a[6:7], v[2:5], s[8:15] dmask:0xc ; encoding: [0x00,0x0c,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:7], v[2:5], s[8:15] dmask:0xc // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0xd ; encoding: [0x00,0x0d,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:8], v[2:5], s[8:15] dmask:0xd // GFX90A: image_load a[6:8], v[2:5], s[8:15] dmask:0xe ; encoding: [0x00,0x0e,0x01,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[6:8], v[2:5], s[8:15] dmask:0xe // GFX90A: image_load a5, v[2:5], s[8:15] ; encoding: [0x00,0x00,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 glc ; encoding: [0x00,0x21,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x1 glc // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 slc ; encoding: [0x00,0x01,0x01,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x1 slc // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 lwe ; encoding: [0x00,0x01,0x03,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x1 lwe // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 da ; encoding: [0x00,0x41,0x01,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x1 da // GFX90A: image_load a5, v[2:5], s[8:15] dmask:0x1 d16 ; encoding: [0x00,0x01,0x01,0xf0,0x02,0x05,0x02,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a5, v[2:5], s[8:15] dmask:0x1 d16 // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_store a252, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0xfc,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a252, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_store a1, v[252:255], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0xfc,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[252:255], s[12:19] dmask:0x1 unorm // GFX90A: image_store a1, v[2:5], s[16:23] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x04,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[16:23] dmask:0x1 unorm // GFX90A: image_store a1, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x2 unorm ; encoding: [0x00,0x12,0x21,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x2 unorm // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:3], v[2:5], s[12:19] dmask:0x3 unorm // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x4 unorm ; encoding: [0x00,0x14,0x21,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x4 unorm // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x5 unorm ; encoding: [0x00,0x15,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:3], v[2:5], s[12:19] dmask:0x5 unorm // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x6 unorm ; encoding: [0x00,0x16,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:3], v[2:5], s[12:19] dmask:0x6 unorm // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0x7 unorm ; encoding: [0x00,0x17,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:4], v[2:5], s[12:19] dmask:0x7 unorm // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x8 unorm ; encoding: [0x00,0x18,0x21,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x8 unorm // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0x9 unorm ; encoding: [0x00,0x19,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:3], v[2:5], s[12:19] dmask:0x9 unorm // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0xa unorm ; encoding: [0x00,0x1a,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:3], v[2:5], s[12:19] dmask:0xa unorm // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0xb unorm ; encoding: [0x00,0x1b,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:4], v[2:5], s[12:19] dmask:0xb unorm // GFX90A: image_store a[2:3], v[2:5], s[12:19] dmask:0xc unorm ; encoding: [0x00,0x1c,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:3], v[2:5], s[12:19] dmask:0xc unorm // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0xd unorm ; encoding: [0x00,0x1d,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:4], v[2:5], s[12:19] dmask:0xd unorm // GFX90A: image_store a[2:4], v[2:5], s[12:19] dmask:0xe unorm ; encoding: [0x00,0x1e,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:4], v[2:5], s[12:19] dmask:0xe unorm // GFX90A: image_store a[2:5], v[2:5], s[12:19] dmask:0xf unorm ; encoding: [0x00,0x1f,0x21,0xf0,0x02,0x02,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[2:5], v[2:5], s[12:19] dmask:0xf unorm // GFX90A: image_store a1, v[2:5], s[12:19] unorm ; encoding: [0x00,0x10,0x21,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] unorm // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x21,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x1 unorm glc // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x21,0xf2,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x1 unorm slc // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x23,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x1 unorm lwe // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x21,0xf0,0x02,0x01,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x1 unorm da // GFX90A: image_store a1, v[2:5], s[12:19] dmask:0x1 unorm d16 ; encoding: [0x00,0x11,0x21,0xf0,0x02,0x01,0x03,0x80] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a1, v[2:5], s[12:19] dmask:0x1 unorm d16 // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_swap a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_swap a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_swap a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_swap a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x41,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_swap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x41,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x41,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x41,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x43,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x41,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_cmpswap a[252:253], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[252:253], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_cmpswap a[6:7], v[252:255], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0xfc,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[252:255], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[12:19] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[2:5], s[12:19] dmask:0x3 unorm // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[92:99] dmask:0x3 unorm ; encoding: [0x00,0x13,0x45,0xf0,0x02,0x06,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[2:5], s[92:99] dmask:0x3 unorm // GFX90A: image_atomic_cmpswap a[6:9], v[2:5], s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x45,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:9], v[2:5], s[8:15] dmask:0xf unorm // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm glc ; encoding: [0x00,0x33,0x45,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm glc // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm slc ; encoding: [0x00,0x13,0x45,0xf2,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm slc // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm lwe ; encoding: [0x00,0x13,0x47,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm lwe // GFX90A: image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm da ; encoding: [0x00,0x53,0x45,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[6:7], v[2:5], s[8:15] dmask:0x3 unorm da // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_add a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_add a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_add a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_add a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x49,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_add a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x49,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x49,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x49,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x4b,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x49,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_add a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_sub a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_sub a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_sub a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_sub a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x4d,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_sub a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x4d,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x4d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x4d,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x4f,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x4d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_sub a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_smin a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_smin a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_smin a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_smin a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x51,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_smin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x51,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x51,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x51,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x53,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x51,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smin a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_umin a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_umin a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_umin a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_umin a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x55,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_umin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x55,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x55,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x55,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x57,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x55,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umin a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_smax a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_smax a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_smax a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_smax a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x59,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_smax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x59,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x59,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x59,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x5b,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x59,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_smax a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_umax a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_umax a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_umax a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_umax a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x5d,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_umax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x5d,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x5d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x5d,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x5f,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x5d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_umax a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_and a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_and a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_and a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_and a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x61,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_and a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x61,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x61,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x61,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x63,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x61,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_and a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_or a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_or a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_or a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_or a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x65,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_or a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x65,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x65,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x65,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x67,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x65,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_or a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_xor a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_xor a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_xor a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_xor a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x69,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_xor a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x69,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x69,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x69,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x6b,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x69,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_xor a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_inc a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_inc a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_inc a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_inc a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x6d,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_inc a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x6d,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x6d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x6d,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x6f,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x6d,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_inc a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_dec a252, v[2:5], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0xfc,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a252, v[2:5], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_dec a5, v[252:255], s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0xfc,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[252:255], s[8:15] dmask:0x1 unorm // GFX90A: image_atomic_dec a5, v[2:5], s[12:19] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x03,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[2:5], s[12:19] dmask:0x1 unorm // GFX90A: image_atomic_dec a5, v[2:5], s[92:99] dmask:0x1 unorm ; encoding: [0x00,0x11,0x71,0xf0,0x02,0x05,0x17,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[2:5], s[92:99] dmask:0x1 unorm // GFX90A: image_atomic_dec a[6:7], v[2:5], s[8:15] dmask:0x3 unorm ; encoding: [0x00,0x13,0x71,0xf0,0x02,0x06,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a[6:7], v[2:5], s[8:15] dmask:0x3 unorm // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm glc ; encoding: [0x00,0x31,0x71,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm glc // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm slc ; encoding: [0x00,0x11,0x71,0xf2,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm slc // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm lwe ; encoding: [0x00,0x11,0x73,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm lwe // GFX90A: image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm da ; encoding: [0x00,0x51,0x71,0xf0,0x02,0x05,0x02,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_dec a5, v[2:5], s[8:15] dmask:0x1 unorm da // GFX90A: image_sample a5, v[0:3], s[8:15], s[12:15] dmask:0x1 ; encoding: [0x00,0x01,0x81,0xf0,0x00,0x05,0x62,0x00] -// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid register class: agpr loads and stores not supported on this GPU +// NOT-GFX90A: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction image_sample a5, v[0:3], s[8:15], s[12:15] dmask:0x1 diff --git a/llvm/test/MC/AMDGPU/gfx950-unsupported.s b/llvm/test/MC/AMDGPU/gfx950-unsupported.s index 8bdab2d..cea81b2 100644 --- a/llvm/test/MC/AMDGPU/gfx950-unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx950-unsupported.s @@ -183,7 +183,7 @@ v_mfma_f32_16x16x8_xf32 v[0:3], a[0:3], a[0:3], v[4:7] // ds_read_b64_tr_b4 //===----------------------------------------------------------------------===// ds_read_b64_tr_b4 v[1:2], v0 -// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_read_b64_tr_b4 v1, v0 @@ -202,7 +202,7 @@ ds_read_b64_tr_b4 v[2:3], v2 offset:-64 //ds_read_b64_tr_b8 //===----------------------------------------------------------------------===// ds_read_b64_tr_b8 v[1:2], v0 -// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_read_b64_tr_b8 v1, v0 @@ -221,7 +221,7 @@ ds_read_b64_tr_b8 v[2:3], v2 offset:-64 // ds_read_b64_tr_b16 //===----------------------------------------------------------------------===// ds_read_b64_tr_b16 v[1:2], v0 -// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU ds_read_b64_tr_b16 v1, v0 diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index 7839475..78aa8f2 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -1,935 +1,1849 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck %s --check-prefix=SICI // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefixes=SICI,CI -// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=GFX89 -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX89,GFX9 - -// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSI,NOSICI,NOSICIVI --implicit-check-not=error: -// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCIVI,NOSICIVI --implicit-check-not=error: -// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICIVI,NOVI,NOGFX89 --implicit-check-not=error: -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX89,NOGFX9 --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89,GFX9 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX11 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX12 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250 + +// RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOSI --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCI --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=tonga %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX89,NOVI --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX89,NOGFX9 --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX11 --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX12 --implicit-check-not=error: +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 %s -mattr=+real-true16 -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOGFX8PLUS,NOGFX1250 --implicit-check-not=error: //---------------------------------------------------------------------------// // fp literal, expected fp operand //---------------------------------------------------------------------------// -// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e] v_fract_f64 v[0:1], 0.5 +// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e] -// SICI: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e] -// GFX89: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x50,0x00,0x7e] v_sqrt_f64 v[0:1], -4.0 +// SICI: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e] +// GFX89: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x50,0x00,0x7e] +// GFX12XX: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e] +// GFX11: v_sqrt_f64_e32 v[0:1], -4.0 ; encoding: [0xf7,0x68,0x00,0x7e] -// SICI: v_log_clamp_f32_e32 v1, 0.5 ; encoding: [0xf0,0x4c,0x02,0x7e] -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: instruction not supported on this GPU v_log_clamp_f32 v1, 0.5 +// NOGFX8PLUS: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// SICI: v_log_clamp_f32_e32 v1, 0.5 ; encoding: [0xf0,0x4c,0x02,0x7e] -// SICI: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], 0.5 ; encoding: [0xf0,0x64,0x00,0x7e] -v_fract_f64 v[0:1], 0.5 - -// SICI: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x38,0x00,0x7e] v_trunc_f32 v0, 0.5 +// SICI: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 0.5 ; encoding: [0xf0,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x64,0x00,0x7e] v_fract_f64 v[0:1], -1.0 +// SICI: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], -1.0 ; encoding: [0xf3,0x7c,0x00,0x7e] -// SICI: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x38,0x00,0x7e] v_trunc_f32 v0, -1.0 +// SICI: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, -1.0 ; encoding: [0xf3,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x64,0x00,0x7e] v_fract_f64 v[0:1], 4.0 +// SICI: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], 4.0 ; encoding: [0xf6,0x7c,0x00,0x7e] -// SICI: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x38,0x00,0x7e] v_trunc_f32 v0, 4.0 +// SICI: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 4.0 ; encoding: [0xf6,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e] v_fract_f64 v[0:1], 0.0 +// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e] -// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e] v_trunc_f32 v0, 0.0 +// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f] -// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f] v_fract_f64 v[0:1], 1.5 +// SICI: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f] +// GFX89: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf8,0x3f] +// GFX12XX: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f] +// GFX11: v_fract_f64_e32 v[0:1], 0x3ff80000 ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf8,0x3f] -// SICI: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f] -// GFX89: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f] v_trunc_f32 v0, 1.5 +// SICI: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f] +// GFX89: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0xc0,0x3f] +// GFX12XX: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f] +// GFX11: v_trunc_f32_e32 v0, 0x3fc00000 ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0xc0,0x3f] -// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0] -// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0] v_fract_f64 v[0:1], -3.1415 +// SICI: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0] +// GFX89: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x64,0x00,0x7e,0xca,0x21,0x09,0xc0] +// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// GFX11: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0] +// GFX12: v_fract_f64_e32 v[0:1], 0xc00921ca ; encoding: [0xff,0x7c,0x00,0x7e,0xca,0x21,0x09,0xc0] +// GFX1250: v_fract_f64_e32 v[0:1], 0xc00921cac083126f ; encoding: [0xfe,0x7c,0x00,0x7e,0x6f,0x12,0x83,0xc0,0xca,0x21,0x09,0xc0] +// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero -// SICI: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0] -// GFX89: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0] v_trunc_f32 v0, -3.1415 +// SICI: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0] +// GFX89: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x38,0x00,0x7e,0x56,0x0e,0x49,0xc0] +// GFX12XX: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0] +// GFX11: v_trunc_f32_e32 v0, 0xc0490e56 ; encoding: [0xff,0x42,0x00,0x7e,0x56,0x0e,0x49,0xc0] -// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44] -// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44] v_fract_f64 v[0:1], 100000000000000000000000.0 +// SICI: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44] +// GFX89: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x64,0x00,0x7e,0x02,0x2d,0xb5,0x44] +// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// GFX11: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44] +// GFX12: v_fract_f64_e32 v[0:1], 0x44b52d02 ; encoding: [0xff,0x7c,0x00,0x7e,0x02,0x2d,0xb5,0x44] +// GFX1250: v_fract_f64_e32 v[0:1], 0x44b52d02c7e14af6 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf6,0x4a,0xe1,0xc7,0x02,0x2d,0xb5,0x44] +// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero -// SICI: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65] -// GFX89: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65] v_trunc_f32 v0, 100000000000000000000000.0 +// SICI: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65] +// GFX89: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x38,0x00,0x7e,0x16,0x68,0xa9,0x65] +// GFX12XX: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65] +// GFX11: v_trunc_f32_e32 v0, 0x65a96816 ; encoding: [0xff,0x42,0x00,0x7e,0x16,0x68,0xa9,0x65] -// SICI: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41] -// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41] v_fract_f64 v[0:1], 10000000.0 +// SICI: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41] +// GFX89: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x64,0x00,0x7e,0xd0,0x12,0x63,0x41] +// GFX12XX: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41] +// GFX11: v_fract_f64_e32 v[0:1], 0x416312d0 ; encoding: [0xff,0x7c,0x00,0x7e,0xd0,0x12,0x63,0x41] -// SICI: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b] -// GFX89: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b] v_trunc_f32 v0, 10000000.0 +// SICI: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b] +// GFX89: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x38,0x00,0x7e,0x80,0x96,0x18,0x4b] +// GFX12XX: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b] +// GFX11: v_trunc_f32_e32 v0, 0x4b189680 ; encoding: [0xff,0x42,0x00,0x7e,0x80,0x96,0x18,0x4b] -// SICI: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47] -// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47] v_fract_f64 v[0:1], 3.402823e+38 +// SICI: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47] +// GFX89: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xef,0x47] +// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// GFX11: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47] +// GFX12: v_fract_f64_e32 v[0:1], 0x47efffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xef,0x47] +// GFX1250: v_fract_f64_e32 v[0:1], 0x47efffff966ad924 ; encoding: [0xfe,0x7c,0x00,0x7e,0x24,0xd9,0x6a,0x96,0xff,0xff,0xef,0x47] +// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero -// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f] -// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f] v_trunc_f32 v0, 3.402823e+38 +// SICI: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f] +// GFX89: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x38,0x00,0x7e,0xfd,0xff,0x7f,0x7f] +// GFX12XX: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f] +// GFX11: v_trunc_f32_e32 v0, 0x7f7ffffd ; encoding: [0xff,0x42,0x00,0x7e,0xfd,0xff,0x7f,0x7f] -// SICI: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38] -// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38] v_fract_f64 v[0:1], 2.3509886e-38 +// SICI: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38] +// GFX89: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0x1f,0x38] +// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// GFX11: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38] +// GFX12: v_fract_f64_e32 v[0:1], 0x381fffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0x1f,0x38] +// GFX1250: v_fract_f64_e32 v[0:1], 0x381fffffe8c9d9fb ; encoding: [0xfe,0x7c,0x00,0x7e,0xfb,0xd9,0xc9,0xe8,0xff,0xff,0x1f,0x38] +// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero -// SICI: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00] -// GFX89: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00] v_trunc_f32 v0, 2.3509886e-38 +// SICI: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00] +// GFX89: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x38,0x00,0x7e,0xff,0xff,0xff,0x00] +// GFX12XX: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00] +// GFX11: v_trunc_f32_e32 v0, 0xffffff ; encoding: [0xff,0x42,0x00,0x7e,0xff,0xff,0xff,0x00] -// SICI: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31] -// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31] v_fract_f64 v[0:1], 2.3509886e-70 +// SICI: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31] +// GFX89: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x64,0x00,0x7e,0x23,0xf6,0x79,0x31] +// NOSICI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX89: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// GFX11: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31] +// GFX12: v_fract_f64_e32 v[0:1], 0x3179f623 ; encoding: [0xff,0x7c,0x00,0x7e,0x23,0xf6,0x79,0x31] +// GFX1250: v_fract_f64_e32 v[0:1], 0x3179f623c2d3cf3c ; encoding: [0xfe,0x7c,0x00,0x7e,0x3c,0xcf,0xd3,0xc2,0x23,0xf6,0x79,0x31] +// NOGFX11: :[[@LINE-8]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOGFX12: :[[@LINE-9]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f32 v0, 2.3509886e-70 +// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction + +v_fract_f64_e32 v[0:1], 1.0 +// SICI: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], 1.0 ; encoding: [0xf2,0x7c,0x00,0x7e] + +v_fract_f64_e32 v[0:1], lit(1.0) +// SICI: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f] +// GFX89: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf0,0x3f] +// GFX11: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f] +// GFX12: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f] +// GFX1250: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00] + +v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1.0 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0xca,0x1b] +// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1.0) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction +// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction +// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction + +v_cos_f16_e32 v5.l, 1.0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode +// GFX11: v_cos_f16_e32 v5.l, 1.0 ; encoding: [0xf2,0xc2,0x0a,0x7e] +// GFX1250: v_cos_f16_e32 v5.l, 1.0 ; encoding: [0xf2,0xc2,0x0a,0x7e] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v5.l, lit(1.0) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode +// GFX11: v_cos_f16_e32 v5.l, lit(0x3c00) ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00] +// GFX1250: v_cos_f16_e32 v5.l, lit(0x3c00) ; encoding: [0xff,0xc2,0x0a,0x7e,0x00,0x3c,0x00,0x00] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_tanh_bf16 v5, 1.0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_tanh_bf16_e32 v5, 1.0 ; encoding: [0xf2,0x94,0x0a,0x7e] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_tanh_bf16 v5, lit(1.0) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_tanh_bf16_e32 v5, lit(0x3f80) ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_trunc_f32_e32 v0, 1.0 +// SICI: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 1.0 ; encoding: [0xf2,0x42,0x00,0x7e] + +v_trunc_f32_e32 v0, lit(1.0) +// SICI: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f] +// GFX89: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x38,0x00,0x7e,0x00,0x00,0x80,0x3f] +// GFX12XX: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f] +// GFX11: v_trunc_f32_e32 v0, lit(0x3f800000) ; encoding: [0xff,0x42,0x00,0x7e,0x00,0x00,0x80,0x3f] + +v_dot2_bf16_bf16 v5.l, v1, v2, 1.0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1.0 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xca,0x03] +// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_dot2_bf16_bf16 v5.l, v1, v2, lit(1.0) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x3f80) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x80,0x3f,0x00,0x00] +// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_dot2_f32_f16 v5, v1, 1.0, v2 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_f32_f16 v5, v1, 1.0, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c] +// GFX12: v_dot2_f32_f16 v5, v1, 1.0, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xe5,0x09,0x1c] +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_dot2_f32_f16 v5, v1, lit(1.0), v2 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00] +// GFX12: v_dot2_f32_f16 v5, v1, lit(0x3c00), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x00,0x3c,0x00,0x00] +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1.l, 1.0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 0x3c00 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1.l, lit(1.0) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU //---------------------------------------------------------------------------// // fp literal, expected int operand //---------------------------------------------------------------------------// -// SICI: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 0.5 +// GFX8PLUS: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], 0.5 ; encoding: [0xf0,0x04,0x80,0xbe] + +s_mov_b64 s[0:1], lit(0.5) +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x26] v_and_b32_e32 v0, 0.5, v1 +// SICI: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 0.5, v1 ; encoding: [0xf0,0x02,0x00,0x36] -// SICI: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00] -// GFX89: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00] v_and_b32_e64 v0, 0.5, v1 +// SICI: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf0,0x02,0x02,0x00] +// GFX89: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf0,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00] +// GFX11: v_and_b32_e64 v0, 0.5, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf0,0x02,0x02,0x00] -// SICI: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], -1.0 +// GFX8PLUS: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], -1.0 ; encoding: [0xf3,0x04,0x80,0xbe] -// SICI: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x26] v_and_b32_e32 v0, -1.0, v1 +// SICI: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, -1.0, v1 ; encoding: [0xf3,0x02,0x00,0x36] -// SICI: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00] -// GFX89: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00] v_and_b32_e64 v0, -1.0, v1 +// SICI: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf3,0x02,0x02,0x00] +// GFX89: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf3,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00] +// GFX11: v_and_b32_e64 v0, -1.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf3,0x02,0x02,0x00] -// SICI: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 4.0 +// GFX8PLUS: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], 4.0 ; encoding: [0xf6,0x04,0x80,0xbe] -// SICI: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x26] v_and_b32_e32 v0, 4.0, v1 +// SICI: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 4.0, v1 ; encoding: [0xf6,0x02,0x00,0x36] -// SICI: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00] -// GFX89: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00] v_and_b32_e64 v0, 4.0, v1 +// SICI: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xf6,0x02,0x02,0x00] +// GFX89: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf6,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00] +// GFX11: v_and_b32_e64 v0, 4.0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf6,0x02,0x02,0x00] -// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 0.0 +// GFX8PLUS: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe] -// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26] v_and_b32_e32 v0, 0.0, v1 +// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] -// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00] -// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00] v_and_b32_e64 v0, 0.0, v1 +// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00] +// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00] +// GFX11: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], 1.5 +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f] -// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f] v_and_b32_e32 v0, 1.5, v1 +// SICI: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f] +// GFX89: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0xc0,0x3f] +// GFX12XX: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f] +// GFX11: v_and_b32_e32 v0, 0x3fc00000, v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0xc0,0x3f] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], -3.1415 +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0] -// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0] v_and_b32_e32 v0, -3.1415, v1 +// SICI: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0] +// GFX89: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x26,0x56,0x0e,0x49,0xc0] +// GFX12XX: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0] +// GFX11: v_and_b32_e32 v0, 0xc0490e56, v1 ; encoding: [0xff,0x02,0x00,0x36,0x56,0x0e,0x49,0xc0] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], 100000000000000000000000.0 +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65] -// GFX89: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65] v_and_b32_e32 v0, 100000000000000000000000.0, v1 +// SICI: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65] +// GFX89: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x26,0x16,0x68,0xa9,0x65] +// GFX12XX: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65] +// GFX11: v_and_b32_e32 v0, 0x65a96816, v1 ; encoding: [0xff,0x02,0x00,0x36,0x16,0x68,0xa9,0x65] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], 10000000.0 +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b] -// GFX89: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b] v_and_b32_e32 v0, 10000000.0, v1 +// SICI: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b] +// GFX89: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x26,0x80,0x96,0x18,0x4b] +// GFX12XX: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b] +// GFX11: v_and_b32_e32 v0, 0x4b189680, v1 ; encoding: [0xff,0x02,0x00,0x36,0x80,0x96,0x18,0x4b] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], 3.402823e+38 +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f] -// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f] v_and_b32_e32 v0, 3.402823e+38, v1 +// SICI: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f] +// GFX89: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x26,0xfd,0xff,0x7f,0x7f] +// GFX12XX: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f] +// GFX11: v_and_b32_e32 v0, 0x7f7ffffd, v1 ; encoding: [0xff,0x02,0x00,0x36,0xfd,0xff,0x7f,0x7f] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], 2.3509886e-38 +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00] -// GFX89: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00] v_and_b32_e32 v0, 2.3509886e-38, v1 +// SICI: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00] +// GFX89: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x26,0xff,0xff,0xff,0x00] +// GFX12XX: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00] +// GFX11: v_and_b32_e32 v0, 0xffffff, v1 ; encoding: [0xff,0x02,0x00,0x36,0xff,0xff,0xff,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], 2.3509886e-70 +// NOGCN: :[[@LINE-1]]:23: error: invalid operand for instruction -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_and_b32_e32 v0, 2.3509886e-70, v1 +// NOGCN: :[[@LINE-1]]:19: error: invalid operand for instruction + +v_not_b16 v5.l, 1.0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_not_b16_e32 v5.l, 1.0 ; encoding: [0xf2,0xd2,0x0a,0x7e] +// GFX1250: v_not_b16_e32 v5.l, 1.0 ; encoding: [0xf2,0xd2,0x0a,0x7e] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_not_b16 v5.l, lit(1.0) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_not_b16_e32 v5.l, lit(0x3f800000) ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f] +// GFX1250: v_not_b16_e32 v5.l, lit(0x3f800000) ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_and_b32_e32 v0, 1.0, v1 +// SICI: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 1.0, v1 ; encoding: [0xf2,0x02,0x00,0x36] + +v_and_b32_e32 v0, lit(1.0), v1 +// SICI: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f] +// GFX89: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x26,0x00,0x00,0x80,0x3f] +// GFX12XX: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f] +// GFX11: v_and_b32_e32 v0, lit(0x3f800000), v1 ; encoding: [0xff,0x02,0x00,0x36,0x00,0x00,0x80,0x3f] + +v_pk_add_u16 v5, exec_lo, 1.0 +// GFX12XX: v_pk_add_u16 v5, exec_lo, 1.0 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_pk_add_u16 v5, exec_lo, 1.0 ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0xe4,0x01,0x18] +// GFX11: v_pk_add_u16 v5, exec_lo, 1.0 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xe4,0x01,0x18] +// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_pk_add_u16 v5, exec_lo, lit(1.0) +// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_pk_add_u16 v5, exec_lo, lit(0x3f800000) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x00,0x00,0x80,0x3f] +// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions) + +v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1.0 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xca,0x03] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1.0) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x3f800000) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x00,0x00,0x80,0x3f] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU //---------------------------------------------------------------------------// // int literal, expected fp operand //---------------------------------------------------------------------------// -// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e] v_trunc_f32_e32 v0, 0 +// SICI: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 0 ; encoding: [0x80,0x42,0x00,0x7e] + +v_fract_f64_e32 v[0:1], 1 +// SICI: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], 1 ; encoding: [0x81,0x7c,0x00,0x7e] + +v_fract_f64_e32 v[0:1], lit(1) +// SICI: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00] +// GFX89: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x64,0x00,0x7e,0x01,0x00,0x00,0x00] +// GFX11: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00] +// GFX12: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00] +// GFX1250: v_fract_f64_e32 v[0:1], lit(0x1) ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00] -// SICI: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], 0 ; encoding: [0x80,0x64,0x00,0x7e] -v_fract_f64_e32 v[0:1], 0 - -// SICI: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00] -// GFX89: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00] v_trunc_f32_e64 v0, 0 +// SICI: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x42,0xd3,0x80,0x00,0x00,0x00] +// GFX89: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0x5c,0xd1,0x80,0x00,0x00,0x00] +// GFX12XX: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00] +// GFX11: v_trunc_f32_e64 v0, 0 ; encoding: [0x00,0x00,0xa1,0xd5,0x80,0x00,0x00,0x00] -// SICI: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00] -// GFX89: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00] v_fract_f64_e64 v[0:1], 0 +// SICI: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x7c,0xd3,0x80,0x00,0x00,0x00] +// GFX89: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0x72,0xd1,0x80,0x00,0x00,0x00] +// GFX12XX: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00] +// GFX11: v_fract_f64_e64 v[0:1], 0 ; encoding: [0x00,0x00,0xbe,0xd5,0x80,0x00,0x00,0x00] -// SICI: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x38,0x00,0x7e] v_trunc_f32_e32 v0, -13 +// SICI: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, -13 ; encoding: [0xcd,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x64,0x00,0x7e] v_fract_f64_e32 v[0:1], -13 +// SICI: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], -13 ; encoding: [0xcd,0x7c,0x00,0x7e] -// SICI: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00] -// GFX89: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00] v_trunc_f32_e64 v0, -13 +// SICI: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x42,0xd3,0xcd,0x00,0x00,0x00] +// GFX89: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0x5c,0xd1,0xcd,0x00,0x00,0x00] +// GFX12XX: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00] +// GFX11: v_trunc_f32_e64 v0, -13 ; encoding: [0x00,0x00,0xa1,0xd5,0xcd,0x00,0x00,0x00] -// SICI: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00] -// GFX89: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00] v_fract_f64_e64 v[0:1], -13 +// SICI: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x7c,0xd3,0xcd,0x00,0x00,0x00] +// GFX89: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0x72,0xd1,0xcd,0x00,0x00,0x00] +// GFX12XX: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00] +// GFX11: v_fract_f64_e64 v[0:1], -13 ; encoding: [0x00,0x00,0xbe,0xd5,0xcd,0x00,0x00,0x00] -// SICI: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x38,0x00,0x7e] v_trunc_f32_e32 v0, 35 +// SICI: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 35 ; encoding: [0xa3,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x64,0x00,0x7e] v_fract_f64_e32 v[0:1], 35 +// SICI: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], 35 ; encoding: [0xa3,0x7c,0x00,0x7e] -// SICI: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00] -// GFX89: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00] v_trunc_f32_e64 v0, 35 +// SICI: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x42,0xd3,0xa3,0x00,0x00,0x00] +// GFX89: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0x5c,0xd1,0xa3,0x00,0x00,0x00] +// GFX12XX: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00] +// GFX11: v_trunc_f32_e64 v0, 35 ; encoding: [0x00,0x00,0xa1,0xd5,0xa3,0x00,0x00,0x00] -// SICI: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00] -// GFX89: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00] v_fract_f64_e64 v[0:1], 35 +// SICI: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x7c,0xd3,0xa3,0x00,0x00,0x00] +// GFX89: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0x72,0xd1,0xa3,0x00,0x00,0x00] +// GFX12XX: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00] +// GFX11: v_fract_f64_e64 v[0:1], 35 ; encoding: [0x00,0x00,0xbe,0xd5,0xa3,0x00,0x00,0x00] -// SICI: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00] -// GFX89: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00] v_trunc_f32_e32 v0, 1234 +// SICI: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00] +// GFX89: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x38,0x00,0x7e,0xd2,0x04,0x00,0x00] +// GFX12XX: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00] +// GFX11: v_trunc_f32_e32 v0, 0x4d2 ; encoding: [0xff,0x42,0x00,0x7e,0xd2,0x04,0x00,0x00] -// SICI: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00] -// GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00] v_fract_f64_e32 v[0:1], 1234 +// SICI: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00] +// GFX89: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x64,0x00,0x7e,0xd2,0x04,0x00,0x00] +// GFX12XX: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00] +// GFX11: v_fract_f64_e32 v[0:1], 0x4d2 ; encoding: [0xff,0x7c,0x00,0x7e,0xd2,0x04,0x00,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported v_trunc_f32_e64 v0, 1234 +// GFX12XX: v_trunc_f32_e64 v0, 0x4d2 ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00] +// NOSICI: :[[@LINE-2]]:21: error: literal operands are not supported +// NOGFX89: :[[@LINE-3]]:21: error: literal operands are not supported +// GFX11: v_trunc_f32_e64 v0, 0x4d2 ; encoding: [0x00,0x00,0xa1,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00] +// NOSICIVI: :[[@LINE-1]]:21: error: literal operands are not supported -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported v_fract_f64_e64 v[0:1], 1234 +// GFX12XX: v_fract_f64_e64 v[0:1], 0x4d2 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00] +// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported +// NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported +// GFX11: v_fract_f64_e64 v[0:1], 0x4d2 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0xd2,0x04,0x00,0x00] +// NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported -// SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff] -// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff] v_trunc_f32_e32 v0, -54321 +// SICI: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// GFX89: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x38,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// GFX12XX: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// GFX11: v_trunc_f32_e32 v0, 0xffff2bcf ; encoding: [0xff,0x42,0x00,0x7e,0xcf,0x2b,0xff,0xff] -// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff] -// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff] v_fract_f64_e32 v[0:1], -54321 +// SICI: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// GFX89: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x64,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// GFX12XX: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff] +// GFX11: v_fract_f64_e32 v[0:1], 0xffff2bcf ; encoding: [0xff,0x7c,0x00,0x7e,0xcf,0x2b,0xff,0xff] -// SICI: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde] -// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde] v_trunc_f32_e32 v0, 0xdeadbeef +// SICI: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde] +// GFX89: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x38,0x00,0x7e,0xef,0xbe,0xad,0xde] +// GFX12XX: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde] +// GFX11: v_trunc_f32_e32 v0, 0xdeadbeef ; encoding: [0xff,0x42,0x00,0x7e,0xef,0xbe,0xad,0xde] -// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde] -// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde] v_fract_f64_e32 v[0:1], 0xdeadbeef +// SICI: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde] +// GFX89: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x64,0x00,0x7e,0xef,0xbe,0xad,0xde] +// GFX12XX: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde] +// GFX11: v_fract_f64_e32 v[0:1], 0xdeadbeef ; encoding: [0xff,0x7c,0x00,0x7e,0xef,0xbe,0xad,0xde] -// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e] v_trunc_f32_e32 v0, 0xffffffff +// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff] -// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff] v_fract_f64_e32 v[0:1], 0xffffffff +// SICI: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff] +// GFX89: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x64,0x00,0x7e,0xff,0xff,0xff,0xff] +// GFX12XX: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff] +// GFX11: v_fract_f64_e32 v[0:1], 0xffffffff ; encoding: [0xff,0x7c,0x00,0x7e,0xff,0xff,0xff,0xff] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f32_e32 v0, 0x123456789abcdef0 +// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_fract_f64_e32 v[0:1], 0x123456789abcdef0 +// NOSICI: :[[@LINE-1]]:25: error: invalid operand for instruction +// NOGFX89: :[[@LINE-2]]:25: error: invalid operand for instruction +// GFX1250: v_fract_f64_e32 v[0:1], 0x123456789abcdef0 ; encoding: [0xfe,0x7c,0x00,0x7e,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12] +// NOGFX11: :[[@LINE-4]]:25: error: invalid operand for instruction +// NOGFX12: :[[@LINE-5]]:25: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-1]]:25: error: invalid operand for instruction -// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] -// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e] v_trunc_f32_e32 v0, 0xffffffffffffffff +// SICI: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, -1 ; encoding: [0xc1,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e] -// GFX89: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x64,0x00,0x7e] v_fract_f64_e32 v[0:1], 0xffffffffffffffff +// SICI: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e] +// GFX89: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e] +// GFX11: v_fract_f64_e32 v[0:1], -1 ; encoding: [0xc1,0x7c,0x00,0x7e] + +v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], 1 ; encoding: [0x08,0x40,0x44,0xcc,0x00,0x09,0x06,0x1a] +// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], lit(1) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// NOGFX11: :[[@LINE-3]]:54: error: invalid operand for instruction +// NOGFX12: :[[@LINE-4]]:54: error: invalid operand for instruction +// NOGFX1250: :[[@LINE-5]]:54: error: invalid operand for instruction + +v_cos_f16_e32 v5.l, 1 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode +// GFX11: v_cos_f16_e32 v5.l, 1 ; encoding: [0x81,0xc2,0x0a,0x7e] +// GFX1250: v_cos_f16_e32 v5.l, 1 ; encoding: [0x81,0xc2,0x0a,0x7e] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_cos_f16_e32 v5.l, lit(1) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode +// GFX11: v_cos_f16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00] +// GFX1250: v_cos_f16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xc2,0x0a,0x7e,0x01,0x00,0x00,0x00] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_tanh_bf16 v5, 1 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_tanh_bf16_e32 v5, 1 ; encoding: [0x81,0x94,0x0a,0x7e] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_tanh_bf16 v5, lit(1) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_tanh_bf16_e32 v5, lit(0x1) ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_trunc_f32_e32 v0, 1 +// SICI: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x42,0x00,0x7e] +// GFX89: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 1 ; encoding: [0x81,0x42,0x00,0x7e] + +v_trunc_f32_e32 v0, lit(1) +// SICI: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00] +// GFX89: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x38,0x00,0x7e,0x01,0x00,0x00,0x00] +// GFX12XX: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00] +// GFX11: v_trunc_f32_e32 v0, lit(0x1) ; encoding: [0xff,0x42,0x00,0x7e,0x01,0x00,0x00,0x00] + +v_dot2_bf16_bf16 v5.l, v1, v2, 1 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, 1 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x06,0x02] +// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_dot2_bf16_bf16 v5.l, v1, v2, lit(1) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_bf16_bf16 v5.l, v1, v2, lit(0x1) ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0xfe,0x03,0x01,0x00,0x00,0x00] +// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_dot2_f32_f16 v5, v1, 1, v2 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_f32_f16 v5, v1, 1, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c] +// GFX12: v_dot2_f32_f16 v5, v1, 1, v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0x03,0x09,0x1c] +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_dot2_f32_f16 v5, v1, lit(1), v2 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_dot2_f32_f16 v5, v1, lit(0x1), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00] +// GFX12: v_dot2_f32_f16 v5, v1, lit(0x1), v2 ; encoding: [0x05,0x40,0x13,0xcc,0x01,0xff,0x09,0x1c,0x01,0x00,0x00,0x00] +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1.l, 1 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_cvt_pk_fp8_f16 v1.l, 1 ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_cvt_pk_fp8_f16 v1.l, lit(1) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x1) ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU //---------------------------------------------------------------------------// // int literal, expected int operand //---------------------------------------------------------------------------// -// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 0 +// GFX8PLUS: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], 0 ; encoding: [0x80,0x04,0x80,0xbe] -// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26] v_and_b32_e32 v0, 0, v1 +// SICI: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 0, v1 ; encoding: [0x80,0x02,0x00,0x36] -// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00] -// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00] v_and_b32_e64 v0, 0, v1 +// SICI: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x36,0xd2,0x80,0x02,0x02,0x00] +// GFX89: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x13,0xd1,0x80,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00] +// GFX11: v_and_b32_e64 v0, 0, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0x80,0x02,0x02,0x00] -// SICI: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], -13 +// GFX8PLUS: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], -13 ; encoding: [0xcd,0x04,0x80,0xbe] -// SICI: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x26] v_and_b32_e32 v0, -13, v1 +// SICI: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, -13, v1 ; encoding: [0xcd,0x02,0x00,0x36] -// SICI: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00] -// GFX89: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00] v_and_b32_e64 v0, -13, v1 +// SICI: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xcd,0x02,0x02,0x00] +// GFX89: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xcd,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00] +// GFX11: v_and_b32_e64 v0, -13, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xcd,0x02,0x02,0x00] -// SICI: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 35 +// GFX8PLUS: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], 35 ; encoding: [0xa3,0x04,0x80,0xbe] -// SICI: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x26] v_and_b32_e32 v0, 35, v1 +// SICI: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 35, v1 ; encoding: [0xa3,0x02,0x00,0x36] -// SICI: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00] -// GFX89: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00] v_and_b32_e64 v0, 35, v1 +// SICI: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x36,0xd2,0xa3,0x02,0x02,0x00] +// GFX89: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xa3,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00] +// GFX11: v_and_b32_e64 v0, 35, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xa3,0x02,0x02,0x00] -// SICI: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00] -// GFX89: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00] s_mov_b64_e32 s[0:1], 1234 +// GFX8PLUS: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x01,0x80,0xbe,0xd2,0x04,0x00,0x00] +// SICI: s_mov_b64 s[0:1], 0x4d2 ; encoding: [0xff,0x04,0x80,0xbe,0xd2,0x04,0x00,0x00] -// SICI: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00] -// GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00] v_and_b32_e32 v0, 1234, v1 +// SICI: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00] +// GFX89: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x26,0xd2,0x04,0x00,0x00] +// GFX12XX: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00] +// GFX11: v_and_b32_e32 v0, 0x4d2, v1 ; encoding: [0xff,0x02,0x00,0x36,0xd2,0x04,0x00,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported v_and_b32_e64 v0, 1234, v1 +// GFX12XX: v_and_b32_e64 v0, 0x4d2, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00] +// NOSICI: :[[@LINE-2]]:19: error: literal operands are not supported +// NOGFX89: :[[@LINE-3]]:19: error: literal operands are not supported +// GFX11: v_and_b32_e64 v0, 0x4d2, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xff,0x02,0x02,0x00,0xd2,0x04,0x00,0x00] +// NOSICIVI: :[[@LINE-1]]:19: error: literal operands are not supported -// SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff] -// GFX89: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff] s_mov_b64_e32 s[0:1], -54321 +// SICI: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x04,0x80,0xbe,0xcf,0x2b,0xff,0xff] +// GFX89: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff] +// GFX11: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff] +// GFX12: s_mov_b64 s[0:1], 0xffff2bcf ; encoding: [0xff,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff] +// GFX1250: s_mov_b64 s[0:1], 0xffffffffffff2bcf ; encoding: [0xfe,0x01,0x80,0xbe,0xcf,0x2b,0xff,0xff,0xff,0xff,0xff,0xff] -// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff] -// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff] v_and_b32_e32 v0, -54321, v1 +// SICI: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff] +// GFX89: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x26,0xcf,0x2b,0xff,0xff] +// GFX12XX: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff] +// GFX11: v_and_b32_e32 v0, 0xffff2bcf, v1 ; encoding: [0xff,0x02,0x00,0x36,0xcf,0x2b,0xff,0xff] -// SICI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde] -// GFX89: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde] s_mov_b64_e32 s[0:1], 0xdeadbeef +// SICI: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde] +// GFX89: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde] +// GFX11: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde] +// GFX12: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde] +// GFX1250: s_mov_b64 s[0:1], 0xdeadbeef ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00] -// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde] -// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde] v_and_b32_e32 v0, 0xdeadbeef, v1 +// SICI: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde] +// GFX89: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x26,0xef,0xbe,0xad,0xde] +// GFX12XX: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde] +// GFX11: v_and_b32_e32 v0, 0xdeadbeef, v1 ; encoding: [0xff,0x02,0x00,0x36,0xef,0xbe,0xad,0xde] -// SICI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff] -// GFX89: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff] s_mov_b64_e32 s[0:1], 0xffffffff +// SICI: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff] +// GFX89: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff] +// GFX11: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff] +// GFX12: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff] +// GFX1250: s_mov_b64 s[0:1], 0xffffffff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00] -// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26] v_and_b32_e32 v0, 0xffffffff, v1 +// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64_e32 s[0:1], 0x123456789abcdef0 +// NOSICI: :[[@LINE-1]]:23: error: invalid operand for instruction +// NOGFX89: :[[@LINE-2]]:23: error: invalid operand for instruction +// GFX1250: s_mov_b64 s[0:1], 0x123456789abcdef0 ; encoding: [0xfe,0x01,0x80,0xbe,0xf0,0xde,0xbc,0x9a,0x78,0x56,0x34,0x12] +// NOGFX11: :[[@LINE-4]]:23: error: invalid operand for instruction +// NOGFX12: :[[@LINE-5]]:23: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-1]]:23: error: invalid operand for instruction -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_and_b32_e32 v0, 0x123456789abcdef0, v1 +// NOGCN: :[[@LINE-1]]:19: error: invalid operand for instruction -// SICI: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x04,0x80,0xbe] -// GFX89: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 0xffffffffffffffff +// GFX8PLUS: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x04,0x80,0xbe] -// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] -// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26] v_and_b32_e32 v0, 0xffffffffffffffff, v1 +// SICI: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, -1, v1 ; encoding: [0xc1,0x02,0x00,0x36] + +v_not_b16 v5.l, 1 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_not_b16_e32 v5.l, 1 ; encoding: [0x81,0xd2,0x0a,0x7e] +// GFX1250: v_not_b16_e32 v5.l, 1 ; encoding: [0x81,0xd2,0x0a,0x7e] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +v_not_b16 v5.l, lit(1) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_not_b16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00] +// GFX1250: v_not_b16_e32 v5.l, lit(0x1) ; encoding: [0xff,0xd2,0x0a,0x7e,0x01,0x00,0x00,0x00] +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode + +s_mov_b64 s[0:1], 1 +// GFX8PLUS: s_mov_b64 s[0:1], 1 ; encoding: [0x81,0x01,0x80,0xbe] +// SICI: s_mov_b64 s[0:1], 1 ; encoding: [0x81,0x04,0x80,0xbe] + +s_mov_b64 s[0:1], lit(1) +// SICI: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x04,0x80,0xbe,0x01,0x00,0x00,0x00] +// GFX89: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00] +// GFX11: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00] +// GFX12: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00] +// GFX1250: s_mov_b64 s[0:1], lit(0x1) ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00] + +v_and_b32_e32 v0, 1, v1 +// SICI: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x36] +// GFX89: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 1, v1 ; encoding: [0x81,0x02,0x00,0x36] + +v_and_b32_e32 v0, lit(1), v1 +// SICI: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00] +// GFX89: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x26,0x01,0x00,0x00,0x00] +// GFX12XX: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00] +// GFX11: v_and_b32_e32 v0, lit(0x1), v1 ; encoding: [0xff,0x02,0x00,0x36,0x01,0x00,0x00,0x00] + +v_pk_add_u16 v5, exec_lo, 1 +// GFX12XX: v_pk_add_u16 v5, exec_lo, 1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_pk_add_u16 v5, exec_lo, 1 ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0x02,0x01,0x18] +// GFX11: v_pk_add_u16 v5, exec_lo, 1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x02,0x01,0x18] +// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_pk_add_u16 v5, exec_lo, lit(1) +// GFX12XX: v_pk_add_u16 v5, exec_lo, lit(0x1) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_pk_add_u16 v5, exec_lo, lit(0x1) ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0xfe,0x01,0x18,0x01,0x00,0x00,0x00] +// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX9: :[[@LINE-5]]:31: error: invalid operand (violates constant bus restrictions) + +v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], 1 ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0x06,0x02] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU + +v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(1) +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX1250: v_perm_pk16_b6_u4 v[2:4], v4, v[4:5], lit(0x1) ; encoding: [0x02,0x00,0x42,0xd6,0x04,0x09,0xfe,0x03,0x01,0x00,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU //---------------------------------------------------------------------------// // 1/(2*PI) //---------------------------------------------------------------------------// -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f32_e32 v0, 0x3fc45f306dc9c882 +// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e] v_fract_f64_e32 v[0:1], 0x3fc45f306dc9c882 +// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e] +// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction +// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e] +// NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction -// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] -// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e] v_trunc_f32_e32 v0, 0x3e22f983 +// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e] -// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e] -// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e] v_fract_f64_e32 v[0:1], 0x3e22f983 +// SICI: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX89: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x64,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX12XX: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX11: v_fract_f64_e32 v[0:1], 0x3e22f983 ; encoding: [0xff,0x7c,0x00,0x7e,0x83,0xf9,0x22,0x3e] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f32_e64 v0, 0x3fc45f306dc9c882 +// NOGCN: :[[@LINE-1]]:21: error: invalid operand for instruction -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00] v_fract_f64_e64 v[0:1], 0x3fc45f306dc9c882 +// GFX89: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0x72,0xd1,0xf8,0x00,0x00,0x00] +// GFX12XX: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00] +// NOSICI: :[[@LINE-3]]:25: error: invalid operand for instruction +// GFX11: v_fract_f64_e64 v[0:1], 0.15915494309189532 ; encoding: [0x00,0x00,0xbe,0xd5,0xf8,0x00,0x00,0x00] +// NOSICIVI: :[[@LINE-2]]:25: error: invalid operand for instruction -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported -// GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00] v_trunc_f32_e64 v0, 0x3e22f983 +// GFX89: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0x5c,0xd1,0xf8,0x00,0x00,0x00] +// GFX12XX: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00] +// NOSICI: :[[@LINE-3]]:21: error: literal operands are not supported +// GFX11: v_trunc_f32_e64 v0, 0.15915494 ; encoding: [0x00,0x00,0xa1,0xd5,0xf8,0x00,0x00,0x00] +// NOSICIVI: :[[@LINE-2]]:21: error: literal operands are not supported -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported v_fract_f64_e64 v[0:1], 0x3e22f983 +// GFX12XX: v_fract_f64_e64 v[0:1], 0x3e22f983 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e] +// NOSICI: :[[@LINE-2]]:25: error: literal operands are not supported +// NOGFX89: :[[@LINE-3]]:25: error: literal operands are not supported +// GFX11: v_fract_f64_e64 v[0:1], 0x3e22f983 ; encoding: [0x00,0x00,0xbe,0xd5,0xff,0x00,0x00,0x00,0x83,0xf9,0x22,0x3e] +// NOSICIVI: :[[@LINE-1]]:25: error: literal operands are not supported -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX89: s_mov_b64 s[0:1], 0.15915494309189532 ; encoding: [0xf8,0x01,0x80,0xbe] s_mov_b64_e32 s[0:1], 0.159154943091895317852646485335 +// GFX8PLUS: s_mov_b64 s[0:1], 0.15915494309189532 ; encoding: [0xf8,0x01,0x80,0xbe] +// NOSICI: :[[@LINE-2]]:23: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-2]]:23: error: invalid operand for instruction -// SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e] -// GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26] v_and_b32_e32 v0, 0.159154943091895317852646485335, v1 +// SICI: v_and_b32_e32 v0, 0x3e22f983, v1 ; encoding: [0xff,0x02,0x00,0x36,0x83,0xf9,0x22,0x3e] +// GFX89: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x26] +// GFX12XX: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x36] +// GFX11: v_and_b32_e32 v0, 0.15915494, v1 ; encoding: [0xf8,0x02,0x00,0x36] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: literal operands are not supported -// GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00] v_and_b32_e64 v0, 0.159154943091895317852646485335, v1 +// GFX89: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x13,0xd1,0xf8,0x02,0x02,0x00] +// GFX12XX: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00] +// NOSICI: :[[@LINE-3]]:19: error: literal operands are not supported +// GFX11: v_and_b32_e64 v0, 0.15915494, v1 ; encoding: [0x00,0x00,0x1b,0xd5,0xf8,0x02,0x02,0x00] +// NOSICIVI: :[[@LINE-2]]:19: error: literal operands are not supported -// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f] -// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e] v_fract_f64 v[0:1], 0.159154943091895317852646485335 +// SICI: v_fract_f64_e32 v[0:1], 0x3fc45f30 ; encoding: [0xff,0x7c,0x00,0x7e,0x30,0x5f,0xc4,0x3f] +// GFX89: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x64,0x00,0x7e] +// GFX12XX: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e] +// NOSICI: :[[@LINE-4]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero +// GFX11: v_fract_f64_e32 v[0:1], 0.15915494309189532 ; encoding: [0xf8,0x7c,0x00,0x7e] +// NOSICIVI: :[[@LINE-3]]:1: warning: Can't encode literal as exact 64-bit floating-point operand. Low 32-bits will be set to zero -// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] -// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e] v_trunc_f32 v0, 0.159154943091895317852646485335 +// SICI: v_trunc_f32_e32 v0, 0x3e22f983 ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX89: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x38,0x00,0x7e] +// GFX12XX: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e] +// GFX11: v_trunc_f32_e32 v0, 0.15915494 ; encoding: [0xf8,0x42,0x00,0x7e] + +v_trunc_f32 v0, lit(0.159154943091895317852646485335) +// SICI: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX89: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x38,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX12XX: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] +// GFX11: v_trunc_f32_e32 v0, lit(0x3e22f983) ; encoding: [0xff,0x42,0x00,0x7e,0x83,0xf9,0x22,0x3e] //---------------------------------------------------------------------------// // integer literal truncation checks //---------------------------------------------------------------------------// -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s0, 0x101ffffffff +// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s0, 0x1000000001 +// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b32 s0, 0x1000000fff +// NOGCN: :[[@LINE-1]]:15: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f32 v0, 0x1fffffffff0 +// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f32 v0, 0x100000001 +// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f32 v0, 0x1fffffff000 +// NOGCN: :[[@LINE-1]]:17: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64 s[0:1], 0x101ffffffff +// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction +// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction +// GFX1250: s_mov_b64 s[0:1], 0x101ffffffff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x01,0x01,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction +// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64 s[0:1], 0x1000000001 +// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction +// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction +// GFX1250: s_mov_b64 s[0:1], 0x1000000001 ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x10,0x00,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction +// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction s_mov_b64 s[0:1], 0x1000000fff +// NOSICI: :[[@LINE-1]]:19: error: invalid operand for instruction +// NOGFX89: :[[@LINE-2]]:19: error: invalid operand for instruction +// GFX1250: s_mov_b64 s[0:1], 0x1000000fff ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0x0f,0x00,0x00,0x10,0x00,0x00,0x00] +// NOGFX11: :[[@LINE-4]]:19: error: invalid operand for instruction +// NOGFX12: :[[@LINE-5]]:19: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-1]]:19: error: invalid operand for instruction -// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction -// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f64 v[0:1], 0x1fffffffff0 +// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction +// GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffffff0 ; encoding: [0xfe,0x2e,0x00,0x7e,0xf0,0xff,0xff,0xff,0xff,0x01,0x00,0x00] +// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction +// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction +// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction +// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction -// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction -// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f64 v[0:1], 0x100000001 +// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction +// GFX1250: v_trunc_f64_e32 v[0:1], 0x100000001 ; encoding: [0xfe,0x2e,0x00,0x7e,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00] +// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction +// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction +// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction +// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction -// NOGFX89: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand for instruction -// NOSI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOCIVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_trunc_f64 v[0:1], 0x1fffffff000 +// NOGFX89: :[[@LINE-1]]:21: error: invalid operand for instruction +// GFX1250: v_trunc_f64_e32 v[0:1], 0x1fffffff000 ; encoding: [0xfe,0x2e,0x00,0x7e,0x00,0xf0,0xff,0xff,0xff,0x01,0x00,0x00] +// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOCI: :[[@LINE-4]]:21: error: invalid operand for instruction +// NOGFX11: :[[@LINE-5]]:21: error: invalid operand for instruction +// NOGFX12: :[[@LINE-6]]:21: error: invalid operand for instruction +// NOCIVI: :[[@LINE-4]]:21: error: invalid operand for instruction //---------------------------------------------------------------------------// // named inline values: scc, vccz, execz //---------------------------------------------------------------------------// +buffer_atomic_add v0, off, s[0:3], scc offset:4095 // SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd] // GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd] -buffer_atomic_add v0, off, s[0:3], scc offset:4095 +// GFX12XX: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00] +// GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xfd] -// SICI: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81] -// GFX89: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81] s_add_i32 s0, vccz, s0 +// SICI: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81] +// GFX89: s_add_i32 s0, src_vccz, s0 ; encoding: [0xfb,0x00,0x00,0x81] +// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU -// SICI: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81] -// GFX89: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81] s_add_i32 s0, execz, s0 +// SICI: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81] +// GFX89: s_add_i32 s0, src_execz, s0 ; encoding: [0xfc,0x00,0x00,0x81] +// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU -// SICI: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81] -// GFX89: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81] s_add_i32 s0, scc, s0 +// SICI: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81] +// GFX89: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81] +// GFX12XX: s_add_co_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81] +// GFX11: s_add_i32 s0, src_scc, s0 ; encoding: [0xfd,0x00,0x00,0x81] -// SICI: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x87] -// GFX89: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_vccz +// SICI: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x87] +// GFX89: s_and_b64 s[0:1], s[0:1], src_vccz ; encoding: [0x00,0xfb,0x80,0x86] +// NOGFX11: :[[@LINE-3]]:27: error: src_vccz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:27: error: src_vccz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:27: error: src_vccz register not available on this GPU -// SICI: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x87] -// GFX89: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_execz +// SICI: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x87] +// GFX89: s_and_b64 s[0:1], s[0:1], src_execz ; encoding: [0x00,0xfc,0x80,0x86] +// NOGFX11: :[[@LINE-3]]:27: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:27: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:27: error: src_execz register not available on this GPU -// SICI: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x87] -// GFX89: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_scc +// SICI: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x87] +// GFX89: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x86] +// GFX12XX: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x8b] +// GFX11: s_and_b64 s[0:1], s[0:1], src_scc ; encoding: [0x00,0xfd,0x80,0x8b] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX89: v_add_u16_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x4c] v_add_u16 v0, vccz, v0 +// GFX89: v_add_u16_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x4c] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06] v_add_u16_sdwa v0, scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u16_sdwa v0, src_scc, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xfd,0x06,0x86,0x06] +// NOVI: :[[@LINE-3]]:20: error: invalid operand for instruction +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86] v_add_u16_sdwa v0, v0, scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u16_sdwa v0, v0, src_scc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xfa,0x01,0x4c,0x00,0x06,0x06,0x86] +// NOVI: :[[@LINE-3]]:24: error: invalid operand for instruction +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_u32 v0, execz, v0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u32_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x68] +// NOVI: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode +// NOGFX11: :[[@LINE-4]]:15: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-5]]:15: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-6]]:15: error: src_execz register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_add_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_u32_e64 v0, scc, v0 +// GFX12XX: v_add_nc_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xfd,0x00,0x02,0x00] +// GFX11: v_add_nc_u32_e64 v0, src_scc, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x02,0x00] +// NOVI: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0x44,0x7d] -// GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0xc4,0x7d] v_cmp_eq_i64 vcc, scc, v[0:1] +// SICI: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0x44,0x7d] +// GFX89: v_cmp_eq_i64_e32 vcc, src_scc, v[0:1] ; encoding: [0xfd,0x00,0xc4,0x7d] +// NOGFX11: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode +// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX89: v_max_f16_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x5a] v_max_f16 v0, execz, v0 +// GFX89: v_max_f16_e32 v0, src_execz, v0 ; encoding: [0xfc,0x00,0x00,0x5a] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// NOGFX11: :[[@LINE-3]]:15: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:15: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:15: error: src_execz register not available on this GPU +// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU -// SICI: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x20] -// GFX89: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x16] v_max_f32 v0, vccz, v0 +// SICI: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x20] +// GFX89: v_max_f32_e32 v0, src_vccz, v0 ; encoding: [0xfb,0x00,0x00,0x16] +// NOGFX11: :[[@LINE-3]]:15: error: src_vccz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:15: error: src_vccz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:15: error: src_vccz register not available on this GPU -// SICI: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00] -// GFX89: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00] v_max_f64 v[0:1], scc, v[0:1] +// SICI: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0xce,0xd2,0xfd,0x00,0x02,0x00] +// GFX89: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00] +// GFX12XX: v_max_num_f64_e32 v[0:1], src_scc, v[0:1] ; encoding: [0xfd,0x00,0x00,0x1c] +// GFX11: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x2a,0xd7,0xfd,0x00,0x02,0x00] -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18] v_pk_add_f16 v0, execz, v0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18] +// NOVI: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX11: :[[@LINE-4]]:18: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-5]]:18: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-6]]:18: error: src_execz register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX89: v_ceil_f16_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20] v_ceil_f16 v0, neg(vccz) +// GFX89: v_ceil_f16_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// NOGFX11: :[[@LINE-3]]:20: error: src_vccz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU +// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX89: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00] v_ceil_f16 v0, abs(scc) +// GFX89: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00] +// GFX12XX: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00] +// NOSICI: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// GFX11: v_ceil_f16_e64 v0, |src_scc| ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00] +// NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU -// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// CI: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00] -// GFX89: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00] v_ceil_f64 v[5:6], |execz| +// GFX89: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x58,0xd1,0xfc,0x00,0x00,0x00] +// CI: v_ceil_f64_e64 v[5:6], |src_execz| ; encoding: [0x05,0x01,0x30,0xd3,0xfc,0x00,0x00,0x00] +// NOSI: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX11: :[[@LINE-4]]:21: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-5]]:21: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-6]]:21: error: src_execz register not available on this GPU -// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// CI: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20] -// GFX89: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20] v_ceil_f64 v[5:6], -vcc +// GFX89: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x58,0xd1,0x6a,0x00,0x00,0x20] +// CI: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x30,0xd3,0x6a,0x00,0x00,0x20] +// GFX11: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20] +// GFX12: v_ceil_f64_e64 v[5:6], -vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x20] +// NOSI: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:12: error: invalid operand for instruction -// SICI: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20] -// GFX89: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20] v_ceil_f32 v0, -vccz +// SICI: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x44,0xd3,0xfb,0x00,0x00,0x20] +// GFX89: v_ceil_f32_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x5d,0xd1,0xfb,0x00,0x00,0x20] +// NOGFX11: :[[@LINE-3]]:17: error: src_vccz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:17: error: src_vccz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:17: error: src_vccz register not available on this GPU -// SICI: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00] -// GFX89: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00] v_ceil_f32 v0, |execz| +// SICI: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x44,0xd3,0xfc,0x00,0x00,0x00] +// GFX89: v_ceil_f32_e64 v0, |src_execz| ; encoding: [0x00,0x01,0x5d,0xd1,0xfc,0x00,0x00,0x00] +// NOGFX11: :[[@LINE-3]]:17: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:17: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:17: error: src_execz register not available on this GPU -// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00] v_ceil_f16_sdwa v5, |vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_ceil_f16_sdwa v5, |src_vccz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfb,0x16,0xa6,0x00] +// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00] v_ceil_f16_sdwa v5, -scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_ceil_f16_sdwa v5, -src_scc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xfd,0x16,0x96,0x00] +// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_ceil_f32_sdwa v5, vccz dst_sel:DWORD src0_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX9: v_ceil_f32_sdwa v5, src_vccz dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfb,0x16,0x86,0x00] +// NOVI: :[[@LINE-3]]:21: error: invalid operand for instruction +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported -// GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction v_ceil_f32_sdwa v5, |execz| dst_sel:DWORD src0_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX9: v_ceil_f32_sdwa v5, |src_execz| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xfc,0x16,0xa6,0x00] +// NOVI: :[[@LINE-3]]:22: error: invalid operand for instruction +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported //---------------------------------------------------------------------------// // named inline values: shared_base, shared_limit, private_base, etc //---------------------------------------------------------------------------// -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb] buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 +// NOSICI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU +// GFX9: buffer_atomic_add v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xeb] +// GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_shared_base offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xeb] +// NOVI: :[[@LINE-4]]:36: error: src_shared_base register not available on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode +// NOSICIVI: :[[@LINE-1]]:36: error: src_shared_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] s_add_i32 s0, src_shared_base, s0 +// GFX12XX: s_add_co_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] +// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU +// GFX9: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] +// GFX11: s_add_i32 s0, src_shared_base, s0 ; encoding: [0xeb,0x00,0x00,0x81] +// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU -// GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] s_add_i32 s0, src_shared_limit, s0 +// GFX12XX: s_add_co_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] +// NOSICI: :[[@LINE-2]]:15: error: src_shared_limit register not available on this GPU +// GFX9: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] +// GFX11: s_add_i32 s0, src_shared_limit, s0 ; encoding: [0xec,0x00,0x00,0x81] +// NOVI: :[[@LINE-5]]:15: error: src_shared_limit register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_limit register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU -// GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81] s_add_i32 s0, src_private_base, s0 +// GFX12XX: s_add_co_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81] +// NOSICI: :[[@LINE-2]]:15: error: src_private_base register not available on this GPU +// GFX9: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81] +// GFX11: s_add_i32 s0, src_private_base, s0 ; encoding: [0xed,0x00,0x00,0x81] +// NOVI: :[[@LINE-5]]:15: error: src_private_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:15: error: src_private_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU -// GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81] s_add_i32 s0, src_private_limit, s0 +// GFX12XX: s_add_co_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81] +// NOSICI: :[[@LINE-2]]:15: error: src_private_limit register not available on this GPU +// GFX9: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81] +// GFX11: s_add_i32 s0, src_private_limit, s0 ; encoding: [0xee,0x00,0x00,0x81] +// NOVI: :[[@LINE-5]]:15: error: src_private_limit register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:15: error: src_private_limit register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_pops_exiting_wave_id register not available on this GPU -// GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81] s_add_i32 s0, src_pops_exiting_wave_id, s0 +// NOSICI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU +// GFX9: s_add_i32 s0, src_pops_exiting_wave_id, s0 ; encoding: [0xef,0x00,0x00,0x81] +// NOVI: :[[@LINE-3]]:15: error: src_pops_exiting_wave_id register not available on this GPU +// NOGFX11: :[[@LINE-4]]:15: error: src_pops_exiting_wave_id register not available on this GPU +// NOGFX12: :[[@LINE-5]]:15: error: src_pops_exiting_wave_id register not available on this GPU +// NOGFX1250: :[[@LINE-6]]:15: error: src_pops_exiting_wave_id register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:15: error: src_pops_exiting_wave_id register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_base +// GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b] +// NOSICI: :[[@LINE-2]]:27: error: src_shared_base register not available on this GPU +// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x86] +// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_base ; encoding: [0x00,0xeb,0x80,0x8b] +// NOVI: :[[@LINE-5]]:27: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:27: error: src_shared_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU -// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_shared_limit +// GFX12XX: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b] +// NOSICI: :[[@LINE-2]]:27: error: src_shared_limit register not available on this GPU +// GFX9: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x86] +// GFX11: s_and_b64 s[0:1], s[0:1], src_shared_limit ; encoding: [0x00,0xec,0x80,0x8b] +// NOVI: :[[@LINE-5]]:27: error: src_shared_limit register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:27: error: src_shared_limit register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU -// GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_base +// GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b] +// NOSICI: :[[@LINE-2]]:27: error: src_private_base register not available on this GPU +// GFX9: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x86] +// GFX11: s_and_b64 s[0:1], s[0:1], src_private_base ; encoding: [0x00,0xed,0x80,0x8b] +// NOVI: :[[@LINE-5]]:27: error: src_private_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:27: error: src_private_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU -// GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_private_limit +// GFX12XX: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b] +// NOSICI: :[[@LINE-2]]:27: error: src_private_limit register not available on this GPU +// GFX9: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x86] +// GFX11: s_and_b64 s[0:1], s[0:1], src_private_limit ; encoding: [0x00,0xee,0x80,0x8b] +// NOVI: :[[@LINE-5]]:27: error: src_private_limit register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:27: error: src_private_limit register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_pops_exiting_wave_id register not available on this GPU -// GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86] s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id +// NOSICI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU +// GFX9: s_and_b64 s[0:1], s[0:1], src_pops_exiting_wave_id ; encoding: [0x00,0xef,0x80,0x86] +// NOVI: :[[@LINE-3]]:27: error: src_pops_exiting_wave_id register not available on this GPU +// NOGFX11: :[[@LINE-4]]:27: error: src_pops_exiting_wave_id register not available on this GPU +// NOGFX12: :[[@LINE-5]]:27: error: src_pops_exiting_wave_id register not available on this GPU +// NOGFX1250: :[[@LINE-6]]:27: error: src_pops_exiting_wave_id register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:27: error: src_pops_exiting_wave_id register not available on this GPU -// GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_add_u16 v0, src_shared_base, v0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4c] +// NOVI: :[[@LINE-3]]:15: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u16_sdwa v0, src_shared_base, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x00,0x00,0x4c,0xeb,0x06,0x86,0x06] +// NOVI: :[[@LINE-3]]:20: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u16_sdwa v0, v0, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0xd6,0x01,0x4c,0x00,0x06,0x06,0x86] +// NOVI: :[[@LINE-3]]:24: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_add_u32 v0, src_shared_base, v0 +// GFX12XX: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x68] +// GFX11: v_add_nc_u32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x4a] +// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_add_u32_e64 v0, src_shared_base, v0 +// GFX12XX: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_add_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x34,0xd1,0xeb,0x00,0x02,0x00] +// GFX11: v_add_nc_u32_e64 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x25,0xd5,0xeb,0x00,0x02,0x00] +// NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d] v_cmp_eq_i64 vcc, src_shared_base, v[0:1] +// NOSICI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU +// GFX9: v_cmp_eq_i64_e32 vcc, src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0xc4,0x7d] +// NOVI: :[[@LINE-3]]:19: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX12: :[[@LINE-5]]:1: error: operands are not valid for this GPU or mode +// NOGFX1250: :[[@LINE-6]]:1: error: operands are not valid for this GPU or mode +// NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU -// GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_max_f16 v0, src_shared_base, v0 +// GFX12XX: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x5a] +// GFX11: v_max_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x72] +// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16] v_max_f32 v0, src_shared_base, v0 +// GFX12XX: v_max_num_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x2c] +// NOSICI: :[[@LINE-2]]:15: error: src_shared_base register not available on this GPU +// GFX9: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x16] +// GFX11: v_max_f32_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x20] +// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:15: error: src_shared_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00] v_max_f64 v[0:1], src_shared_base, v[0:1] +// GFX12XX: v_max_num_f64_e32 v[0:1], src_shared_base, v[0:1] ; encoding: [0xeb,0x00,0x00,0x1c] +// NOSICI: :[[@LINE-2]]:19: error: src_shared_base register not available on this GPU +// GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00] +// GFX11: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x2a,0xd7,0xeb,0x00,0x02,0x00] +// NOVI: :[[@LINE-5]]:19: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:19: error: src_shared_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18] v_pk_add_f16 v0, src_shared_base, v0 +// GFX12XX: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18] +// GFX11: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x0f,0xcc,0xeb,0x00,0x02,0x18] +// NOVI: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f16 v0, neg(src_shared_base) +// GFX12XX: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] +// GFX11: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20] +// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f16 v0, abs(src_shared_base) +// GFX12XX: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX9: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00] +// GFX11: v_ceil_f16_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00] +// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00] -// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOCIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f64 v[5:6], |src_shared_base| +// GFX9: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x58,0xd1,0xeb,0x00,0x00,0x00] +// GFX11: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00] +// GFX12: v_ceil_f64_e64 v[5:6], |src_shared_base| ; encoding: [0x05,0x01,0x98,0xd5,0xeb,0x00,0x00,0x00] +// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU +// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU +// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction +// NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU -// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20] -// NOSI: :[[@LINE+3]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOCIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f64 v[5:6], -src_shared_base +// GFX9: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x58,0xd1,0xeb,0x00,0x00,0x20] +// GFX11: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20] +// GFX12: v_ceil_f64_e64 v[5:6], -src_shared_base ; encoding: [0x05,0x00,0x98,0xd5,0xeb,0x00,0x00,0x20] +// NOSI: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOCI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU +// NOVI: :[[@LINE-6]]:21: error: src_shared_base register not available on this GPU +// NOGFX1250: :[[@LINE-7]]:12: error: invalid operand for instruction +// NOCIVI: :[[@LINE-5]]:21: error: src_shared_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20] v_ceil_f32 v0, -src_shared_base +// GFX12XX: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20] +// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU +// GFX9: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x5d,0xd1,0xeb,0x00,0x00,0x20] +// GFX11: v_ceil_f32_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0xa2,0xd5,0xeb,0x00,0x00,0x20] +// NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00] v_ceil_f32 v0, |src_shared_base| +// GFX12XX: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00] +// NOSICI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU +// GFX9: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0x5d,0xd1,0xeb,0x00,0x00,0x00] +// GFX11: v_ceil_f32_e64 v0, |src_shared_base| ; encoding: [0x00,0x01,0xa2,0xd5,0xeb,0x00,0x00,0x00] +// NOVI: :[[@LINE-5]]:17: error: src_shared_base register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU -// GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_ceil_f16_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] +// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// GFX9: v_ceil_f16_sdwa v5, -src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x8a,0x0a,0x7e,0xeb,0x16,0x96,0x00] +// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD src0_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX9: v_ceil_f32_sdwa v5, src_shared_base dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0x86,0x00] +// NOVI: :[[@LINE-3]]:21: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported -// GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD src0_sel:DWORD +// NOSICI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported +// GFX9: v_ceil_f32_sdwa v5, |src_shared_base| dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x3a,0x0a,0x7e,0xeb,0x16,0xa6,0x00] +// NOVI: :[[@LINE-3]]:22: error: src_shared_base register not available on this GPU +// NOGFX11: :[[@LINE-4]]:1: error: sdwa variant of this instruction is not supported +// NOGFX12: :[[@LINE-5]]:1: error: sdwa variant of this instruction is not supported +// NOGFX1250: :[[@LINE-6]]:1: error: sdwa variant of this instruction is not supported +// NOSICIVI: :[[@LINE-1]]:1: error: sdwa variant of this instruction is not supported //---------------------------------------------------------------------------// // named inline values compete with other scalars for constant bus access //---------------------------------------------------------------------------// -// NOGFX9: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: src_private_base register not available on this GPU v_add_u32 v0, private_base, s0 +// GFX12XX: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_add_nc_u32_e64 v0, src_private_base, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xed,0x00,0x00,0x00] +// NOVI: :[[@LINE-4]]:15: error: src_private_base register not available on this GPU +// NOGFX9: :[[@LINE-5]]:29: error: invalid operand (violates constant bus restrictions) +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOGFX9: :[[@LINE+3]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOVI: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_u32 v0, scc, s0 +// GFX12XX: v_add_nc_u32_e64 v0, src_scc, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_add_nc_u32_e64 v0, src_scc, s0 ; encoding: [0x00,0x00,0x25,0xd5,0xfd,0x00,0x00,0x00] +// NOVI: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode +// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions) +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU // v_div_fmas implicitly reads VCC -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, shared_base, v0, v1 +// GFX12XX: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04] +// NOSICI: :[[@LINE-2]]:20: error: src_shared_base register not available on this GPU +// GFX11: v_div_fmas_f32 v0, src_shared_base, v0, v1 ; encoding: [0x00,0x00,0x37,0xd6,0xeb,0x00,0x06,0x04] +// NOVI: :[[@LINE-4]]:20: error: src_shared_base register not available on this GPU +// NOGFX9: :[[@LINE-5]]:20: error: invalid operand (violates constant bus restrictions) +// NOSICIVI: :[[@LINE-1]]:20: error: src_shared_base register not available on this GPU // v_div_fmas implicitly reads VCC -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_limit register not available on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, shared_limit, v1 +// GFX12XX: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04] +// NOSICI: :[[@LINE-2]]:24: error: src_shared_limit register not available on this GPU +// GFX11: v_div_fmas_f32 v0, v0, src_shared_limit, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xd9,0x05,0x04] +// NOVI: :[[@LINE-4]]:24: error: src_shared_limit register not available on this GPU +// NOGFX9: :[[@LINE-5]]:24: error: invalid operand (violates constant bus restrictions) +// NOSICIVI: :[[@LINE-1]]:24: error: src_shared_limit register not available on this GPU // v_div_fmas implicitly reads VCC -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_limit register not available on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, v1, private_limit +// GFX12XX: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03] +// NOSICI: :[[@LINE-2]]:28: error: src_private_limit register not available on this GPU +// GFX11: v_div_fmas_f32 v0, v0, v1, src_private_limit ; encoding: [0x00,0x00,0x37,0xd6,0x00,0x03,0xba,0x03] +// NOVI: :[[@LINE-4]]:28: error: src_private_limit register not available on this GPU +// NOGFX9: :[[@LINE-5]]:28: error: invalid operand (violates constant bus restrictions) +// NOSICIVI: :[[@LINE-1]]:28: error: src_private_limit register not available on this GPU // v_div_fmas implicitly reads VCC -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, execz, v0, v1 +// NOSICI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions) +// NOGFX89: :[[@LINE-2]]:20: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-3]]:20: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:20: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:20: error: src_execz register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:20: error: invalid operand (violates constant bus restrictions) // v_div_fmas implicitly reads VCC -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, scc, v1 +// GFX12XX: v_div_fmas_f32 v0, v0, src_scc, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04] +// NOSICI: :[[@LINE-2]]:24: error: invalid operand (violates constant bus restrictions) +// NOGFX89: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions) +// GFX11: v_div_fmas_f32 v0, v0, src_scc, v1 ; encoding: [0x00,0x00,0x37,0xd6,0x00,0xfb,0x05,0x04] +// NOSICIVI: :[[@LINE-1]]:24: error: invalid operand (violates constant bus restrictions) // v_div_fmas implicitly reads VCC -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_div_fmas_f32 v0, v0, v1, vccz +// NOSICI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions) +// NOGFX89: :[[@LINE-2]]:28: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-3]]:28: error: src_vccz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:28: error: src_vccz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:28: error: src_vccz register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:28: error: invalid operand (violates constant bus restrictions) // v_addc_co_u32 implicitly reads VCC (VOP2) -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_addc_co_u32 v0, vcc, shared_base, v0, vcc +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// NOGFX9: :[[@LINE-3]]:24: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_shared_base register not available on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, shared_base, v0, 0x11213141 +// NOSICI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU +// NOVI: :[[@LINE-2]]:17: error: src_shared_base register not available on this GPU +// NOGFX9: :[[@LINE-3]]:17: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-6]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:17: error: src_shared_base register not available on this GPU -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_madak_f32 v0, scc, v0, 0x11213141 +// NOSICI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions) +// NOGFX89: :[[@LINE-2]]:17: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:17: error: invalid operand (violates constant bus restrictions) -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madak_f32 v0, 0xff32ff, v0, 0x11213141 +// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed +// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madak_f32 v0, 0xff32ff, v0, 1 +// NOSICI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed +// NOGFX89: :[[@LINE-2]]:31: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:31: error: only one unique literal operand is allowed -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madmk_f32 v0, 0xff32ff, 0x11213141, v0 +// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed +// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madmk_f32 v0, 0xff32ff, -1, v0 +// NOSICI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed +// NOGFX89: :[[@LINE-2]]:27: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:27: error: only one unique literal operand is allowed -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madak_f16 v0, 0xff32, v0, 0x1122 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madak_f16 v0, 0xff32, v0, 0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:29: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madmk_f16 v0, 0xff32, 0x1122, v0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: only one unique literal operand is allowed v_madmk_f16 v0, 0xff32, 1, v0 +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOGFX89: :[[@LINE-2]]:25: error: only one unique literal operand is allowed +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, private_limit +// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU +// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU +// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction +// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction +// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: src_private_base register not available on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], private_base, s0 +// NOSICI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU +// NOVI: :[[@LINE-2]]:22: error: src_private_base register not available on this GPU +// NOGFX9: :[[@LINE-3]]:36: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-4]]:14: error: invalid operand for instruction +// NOGFX12: :[[@LINE-5]]:14: error: invalid operand for instruction +// NOGFX1250: :[[@LINE-6]]:14: error: invalid operand for instruction +// NOSICIVI: :[[@LINE-1]]:22: error: src_private_base register not available on this GPU -// NOGCN: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_cmp_eq_f32 s[0:1], execz, s0 +// NOSICI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions) +// NOGFX89: :[[@LINE-2]]:29: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-3]]:22: error: src_execz register not available on this GPU +// NOGFX12: :[[@LINE-4]]:22: error: src_execz register not available on this GPU +// NOGFX1250: :[[@LINE-5]]:22: error: src_execz register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:29: error: invalid operand (violates constant bus restrictions) -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, private_base, private_limit +// GFX12XX: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18] +// NOSICI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// GFX11: v_pk_add_f16 v255, src_private_base, src_private_limit ; encoding: [0xff,0x40,0x0f,0xcc,0xed,0xdc,0x01,0x18] +// NOVI: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX9: :[[@LINE-5]]:34: error: invalid operand (violates constant bus restrictions) +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU -// NOSICIVI: :[[@LINE+2]]:{{[0-9]+}}: error: instruction not supported on this GPU -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions) v_pk_add_f16 v255, vccz, execz +// NOSICI: :[[@LINE-1]]:1: error: instruction not supported on this GPU +// NOVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU +// NOGFX9: :[[@LINE-3]]:26: error: invalid operand (violates constant bus restrictions) +// NOGFX11: :[[@LINE-4]]:20: error: src_vccz register not available on this GPU +// NOGFX12: :[[@LINE-5]]:20: error: src_vccz register not available on this GPU +// NOGFX1250: :[[@LINE-6]]:20: error: src_vccz register not available on this GPU +// NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU //---------------------------------------------------------------------------// -// check dummy lit() syntax for sp3 compatibility. +// check lit() syntax. //---------------------------------------------------------------------------// -// SICI: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] -// GFX89: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00] v_sqrt_f32 v2, lit(123) +// SICI: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX89: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX11: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] -// SICI: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] -// GFX89: v_sqrt_f32_e32 v2, 0x7b ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00] v_sqrt_f32 v2, abs(lit(123)) +// SICI: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX89: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX11: v_sqrt_f32_e32 v2, lit(0x7b) ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00] -// SICI: v_sqrt_f32_e32 v2, 0x42f60000 ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42 -// GFX89: v_sqrt_f32_e32 v2, 0x42f60000 ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42] v_sqrt_f32 v2, lit(123.0) +// SICI: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42] +// GFX89: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42] +// GFX12XX: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42] +// GFX11: v_sqrt_f32_e32 v2, lit(0x42f60000) ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42] -// SICI: v_sqrt_f64_e32 v[2:3], 0x405ec000 ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40] -// GFX89: v_sqrt_f64_e32 v[2:3], 0x405ec000 ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40] v_sqrt_f64 v[2:3], lit(123.0) +// SICI: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40] +// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40] +// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40] +// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40] +// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x405ec000) ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00] -// SICI: v_sqrt_f64_e32 v[2:3], 0x7b ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00] -// GFX89: v_sqrt_f64_e32 v[2:3], 0x7b ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00] v_sqrt_f64 v[2:3], lit(123) +// SICI: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX89: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX11: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX12: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00] +// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x7b) ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00] -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected left paren after lit -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected left paren after lit v_sqrt_f32 v2, lit 123.0 +// NOGCN: :[[@LINE-1]]:20: error: expected left paren after lit -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected closing parentheses -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected closing parentheses v_sqrt_f32 v2, lit(123.0 +// NOGCN: :[[@LINE-1]]:25: error: expected closing parentheses -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected immediate with lit modifier -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected immediate with lit modifier v_sqrt_f32 v2, lit(v1) +// NOGCN: :[[@LINE-1]]:20: error: expected immediate with lit modifier // Make sure lit() is accepted on operands without modifiers. -// SICI: v_madak_f32 v4, 0x7e8, v8, 0x7e8 ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00] -// GFX89: v_madak_f32 v4, 0x7e8, v8, 0x7e8 ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00] v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) +// SICI: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00] +// GFX89: v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8) ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00] +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU -// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: not a valid operand. -// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand. v_madak_f32 v4, lit(lit(0x7e8)), v8, lit(0x7e8) +// NOSICI: :[[@LINE-1]]:24: error: not a valid operand. +// NOGFX89: :[[@LINE-2]]:24: error: not a valid operand. +// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU +// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU +// NOGFX1250: :[[@LINE-5]]:1: error: instruction not supported on this GPU +// NOSICIVI: :[[@LINE-1]]:24: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s b/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s index c935c37..dbaddc1 100644 --- a/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s +++ b/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s @@ -1,103 +1,103 @@ // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=GFX90A --implicit-check-not=error: %s v_add_f64 v[1:2], v[1:2], v[1:2] -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx2 v[1:2], v[0:1], off -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx3 v[1:3], v[0:1], off -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx4 v[1:4], v[0:1], off -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx2 a[1:2], v[0:1], off -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx3 a[1:3], v[0:1], off -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction global_load_dwordx4 a[1:4], v[0:1], off -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load v[1:2], v2, s[0:7] dmask:0x3 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load v[1:3], v2, s[0:7] dmask:0x7 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load v[1:4], v2, s[0:7] dmask:0xf unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[1:2], v2, s[0:7] dmask:0x3 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[1:3], v2, s[0:7] dmask:0x7 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_load a[1:4], v2, s[0:7] dmask:0xf unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_store v[193:194], v[238:241], s[28:35] dmask:0x3 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_store v[193:195], v[238:241], s[28:35] dmask:0x7 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_store v[193:196], v[238:241], s[28:35] dmask:0xf unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[193:194], v[238:241], s[28:35] dmask:0x3 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[193:195], v[238:241], s[28:35] dmask:0x7 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_store a[193:196], v[238:241], s[28:35] dmask:0xf unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_swap v4, v[193:196], s[28:35] dmask:0x1 unorm glc // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode image_atomic_swap v[5:6], v1, s[8:15] dmask:0x3 unorm -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap v[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap v[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode image_atomic_cmpswap v[5:8], v[192:195], s[28:35] dmask:0xf unorm glc -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap v[4:7], v[193:196], s[28:35] dmask:0xf unorm glc // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode image_atomic_cmpswap a[5:6], v[192:195], s[28:35] dmask:0x3 unorm glc -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[4:5], v[193:196], s[28:35] dmask:0x3 unorm glc // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode image_atomic_cmpswap a[5:8], v[192:195], s[28:35] dmask:0xf unorm glc -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction image_atomic_cmpswap a[4:7], v[193:196], s[28:35] dmask:0xf unorm glc // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_mfma_f32_32x32x8f16 a[0:15], a[1:2], v[0:1], a[0:15] -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_mfma_i32_4x4x4i8 a[1:4], a0, v1, 2 -// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[17:32] // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: invalid register class: vgpr tuples must be 64 bit aligned diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s index 56e71b9..dd6be544 100644 --- a/llvm/test/MC/AMDGPU/vop3-literal.s +++ b/llvm/test/MC/AMDGPU/vop3-literal.s @@ -3,6 +3,7 @@ // RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx900 | FileCheck %s -check-prefix=GFX9 // RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1010 -mattr=+wavefrontsize64 | FileCheck %s -check-prefix=GFX10 // RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1250 -mattr=+wavefrontsize64 | FileCheck %s -check-prefix=GFX1250 +// RUN: not llvm-mc -triple=amdgcn %s -show-encoding -mcpu=gfx1250 -mattr=+wavefrontsize64 | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -show-encoding | FileCheck --check-prefixes=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn %s -filetype=null -no-warn 2>&1 -mcpu=gfx900 | FileCheck %s -implicit-check-not=error: -check-prefix=GFX9-ERR // RUN: not llvm-mc -triple=amdgcn %s -filetype=null -no-warn 2>&1 -mcpu=gfx1010 -mattr=+wavefrontsize64 | FileCheck %s -implicit-check-not=error: -check-prefix=GFX10-ERR @@ -185,7 +186,7 @@ v_add_f16_e64 v0, 0xfe0b, neg(0xfe0b) v_add_f64 v[0:1], 1.23456, v[0:1] // GFX10: v_add_f64 v[0:1], 0x3ff3c0c1, v[0:1] ; encoding: [0x00,0x00,0x64,0xd5,0xff,0x00,0x02,0x00,0xc1,0xc0,0xf3,0x3f] -// GFX1250: v_add_f64_e32 v[0:1], lit64(0x3ff3c0c1fc8f3238), v[0:1] ; encoding: [0xfe,0x00,0x00,0x04,0x38,0x32,0x8f,0xfc,0xc1,0xc0,0xf3,0x3f] +// GFX1250: v_add_f64_e32 v[0:1], 0x3ff3c0c1fc8f3238, v[0:1] ; encoding: [0xfe,0x00,0x00,0x04,0x38,0x32,0x8f,0xfc,0xc1,0xc0,0xf3,0x3f] // GFX9-ERR: :[[@LINE-3]]:19: error: literal operands are not supported v_add_f64 v[0:1], v[0:1], -abs(1.23456) diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt index 7064479..d44400e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_salu_lit64.txt @@ -2,55 +2,55 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s 0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_mov_b64 s[2:3], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_mov_b64 s[2:3], 0x10abcdef12345678 ; encoding: [0xfe,0x01,0x82,0xbe,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_add_nc_u64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_add_nc_u64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0xa9,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_and_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_and_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), lit64(0x10abcdef12345678) ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, 0x10abcdef12345678 ; encoding: [0xfe,0xfe,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_and_not1_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_and_not1_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x91,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_ashr_i64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_ashr_i64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x86,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80 -# GFX1250: s_bfe_i64 s[2:3], lit64(0x80abcdef12345678), 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80] +# GFX1250: s_bfe_i64 s[2:3], 0x80abcdef12345678, 5 ; encoding: [0xfe,0x85,0x82,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x80] 0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_bfe_u64 s[2:3], lit64(0x10abcdef12345678), 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_bfe_u64 s[2:3], 0x10abcdef12345678, 5 ; encoding: [0xfe,0x85,0x02,0x94,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_cselect_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_cselect_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x98,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_lshl_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_lshl_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x84,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_lshr_b64 s[2:3], lit64(0x10abcdef12345678), s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_lshr_b64 s[2:3], 0x10abcdef12345678, s4 ; encoding: [0xfe,0x04,0x82,0x85,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_mul_u64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_mul_u64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0xaa,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_nand_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_nand_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8e,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_nor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_nor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_or_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_or_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x8c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_or_not1_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_or_not1_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x92,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_xnor_b64 s[2:3], s[4:5], lit64(0x10abcdef12345678) ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_xnor_b64 s[2:3], s[4:5], 0x10abcdef12345678 ; encoding: [0x04,0xfe,0x82,0x90,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: s_xor_b64 s[2:3], lit64(0x10abcdef12345678), s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: s_xor_b64 s[2:3], 0x10abcdef12345678, s[4:5] ; encoding: [0xfe,0x04,0x82,0x8d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt index 227e1c4..34a4646 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sop1.txt @@ -2,7 +2,7 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s 0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00 -# GFX1250: s_add_pc_i64 lit64(0x12345678abcd0) ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00] +# GFX1250: s_add_pc_i64 0x12345678abcd0 ; encoding: [0xfe,0x4b,0x80,0xbe,0xd0,0xbc,0x8a,0x67,0x45,0x23,0x01,0x00] 0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00 # GFX1250: s_add_pc_i64 0x64 ; encoding: [0xff,0x4b,0x80,0xbe,0x64,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt index 1571fb9..cce6a74 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_valu_lit64.txt @@ -2,211 +2,211 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s 0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_add_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_add_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x05,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_ceil_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x30,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_class_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_class_f64_e32 vcc_lo, 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_eq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_eq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_ge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_ge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_gt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_gt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_gt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_gt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_gt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_gt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_le_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_le_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_le_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_le_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_le_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_le_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_lg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_lg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_lt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_lt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_lt_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_lt_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_lt_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_lt_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_ne_i64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_ne_i64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_ne_u64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_ne_u64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_neq_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_neq_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_nge_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_nge_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_ngt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_ngt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_nle_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_nle_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_nlg_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_nlg_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_nlt_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_nlt_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_o_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_o_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmp_u_f64_e32 vcc_lo, lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmp_u_f64_e32 vcc_lo, 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7c,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_class_f64_e32 lit64(0x10abcdef12345678), v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_class_f64_e32 0x10abcdef12345678, v255 ; encoding: [0xfe,0xfe,0xff,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_eq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_eq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x45,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_eq_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_eq_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_eq_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_eq_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb5,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_ge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_ge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_ge_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_ge_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xad,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_ge_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_ge_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbd,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_gt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_gt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x49,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_gt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_gt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_gt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_gt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb9,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_le_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_le_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x47,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_le_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_le_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_le_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_le_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb7,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_lg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_lg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_lt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_lt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x43,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_lt_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_lt_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xa3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_lt_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_lt_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xb3,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_ne_i64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_ne_i64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xab,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_ne_u64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_ne_u64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xbb,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_neq_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_neq_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5b,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_nge_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_nge_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x53,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_ngt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_ngt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x57,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_nle_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_nle_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x59,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_nlg_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_nlg_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x55,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_nlt_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_nlt_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x5d,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_o_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_o_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x4f,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cmpx_u_f64_e32 lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cmpx_u_f64_e32 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0x51,0x7d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cvt_f32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cvt_f32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x1e,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cvt_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cvt_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x06,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_cvt_u32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_cvt_u32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x2a,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_floor_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_floor_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x34,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_fract_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_fract_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7c,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_frexp_exp_i32_f64_e32 v255, lit64(0x10abcdef12345678) ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_frexp_exp_i32_f64_e32 v255, 0x10abcdef12345678 ; encoding: [0xfe,0x78,0xfe,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_frexp_mant_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_frexp_mant_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x7a,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_max_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_max_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_min_num_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_min_num_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x1b,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_mul_f64_e32 v[254:255], lit64(0x10abcdef12345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_mul_f64_e32 v[254:255], 0x10abcdef12345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x0d,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_rcp_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_rcp_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x5e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_rndne_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_rndne_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x32,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_rsq_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_rsq_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x62,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_sqrt_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_sqrt_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x68,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10 -# GFX1250: v_trunc_f64_e32 v[254:255], lit64(0x10abcdef12345678) ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] +# GFX1250: v_trunc_f64_e32 v[254:255], 0x10abcdef12345678 ; encoding: [0xfe,0x2e,0xfc,0x7f,0x78,0x56,0x34,0x12,0xef,0xcd,0xab,0x10] 0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40 -# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x4063233333333333) ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40] +# GFX1250: v_ceil_f64_e32 v[254:255], 0x4063233333333333 ; encoding: [0xfe,0x30,0xfc,0x7f,0x33,0x33,0x33,0x33,0x33,0x23,0x63,0x40] 0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44 -# GFX1250: v_ceil_f64_e32 v[254:255], lit64(0x448969368974c05b) ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44] +# GFX1250: v_ceil_f64_e32 v[254:255], 0x448969368974c05b ; encoding: [0xfe,0x30,0xfc,0x7f,0x5b,0xc0,0x74,0x89,0x36,0x69,0x89,0x44] 0xff,0x30,0xfc,0x7f,0x00,0x20,0x63,0x40 # GFX1250: v_ceil_f64_e32 v[254:255], 0x40632000 ; encoding: [0xff,0x30,0xfc,0x7f,0x00,0x20,0x63,0x40] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt index 94edf22..acf7ded 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt @@ -3,7 +3,7 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s 0xff,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf -# GFX1250: v_mov_b64_e32 v[254:255], lit64(0xaf123456) ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: v_mov_b64_e32 v[254:255], 0xaf123456 ; encoding: [0xfe,0x3a,0xfc,0x7f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x3a,0x08,0x7e # GFX1250: v_mov_b64_e32 v[4:5], -1 ; encoding: [0xc1,0x3a,0x08,0x7e] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt index fb3f1b2..b117d7b0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt @@ -25,7 +25,7 @@ 0xe9,0x3e,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_tanh_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x3e,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_tanh_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -41,7 +41,7 @@ 0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_tanh_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -66,7 +66,7 @@ 0xe9,0xf2,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_rcp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_sqrt_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf4,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -82,7 +82,7 @@ 0xe9,0xf4,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_sqrt_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf4,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0xf6,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_rsq_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf6,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -98,7 +98,7 @@ 0xe9,0xf6,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_rsq_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf6,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_log_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xf8,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -114,7 +114,7 @@ 0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_log_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf8,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_exp_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfa,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -130,7 +130,7 @@ 0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_exp_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfa,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_sin_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -146,7 +146,7 @@ 0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_sin_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfc,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_cos_bf16_dpp v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -162,7 +162,7 @@ 0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX1250-REAL16: v_cos_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00 # GFX1250-REAL16: v_cvt_f32_bf16_dpp v127, v127.l dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xe4,0xfe,0x7e,0x7f,0x00,0x00,0x00] @@ -186,7 +186,7 @@ 0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf0,0x02,0x7f,0x02,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05] 0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_f16_bf8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xf0,0x02,0x7e,0x02,0x77,0x39,0x05] @@ -202,7 +202,7 @@ 0xe9,0xee,0x02,0x7f,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_f16_fp8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xee,0x02,0x7f,0x02,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05] 0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_cvt_pk_f16_bf8_dpp v1, v2.l dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xec,0x02,0x7e,0x02,0x77,0x39,0x05] @@ -230,7 +230,7 @@ 0xe9,0xe6,0x02,0x7f,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_sat_pk4_i4_i8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe6,0x02,0x7f,0x02,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05] 0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.l, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7e,0x02,0x77,0x39,0x05] @@ -242,4 +242,4 @@ 0xe9,0xe8,0x02,0x7f,0x02,0x77,0x39,0x05 # GFX1250-REAL16: v_sat_pk4_u4_u8_dpp v1.h, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xe8,0x02,0x7f,0x02,0x77,0x39,0x05] -# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188] ; encoding: [0x02,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[2:3], v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x02,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt index 130941c..58ac4e9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop2.txt @@ -146,7 +146,7 @@ # GFX1250: v_add_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x50,0x73,0x72,0x71,0x3f] 0xff,0x08,0x08,0x50,0x56,0x34,0x12,0xaf -# GFX1250: v_add_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: v_add_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x50,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x7e,0x08,0x08,0x50 # GFX1250: v_add_nc_u64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x50] @@ -233,7 +233,7 @@ # GFX1250: v_sub_nc_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x52,0x73,0x72,0x71,0x3f] 0xff,0x08,0x08,0x52,0x56,0x34,0x12,0xaf -# GFX1250: v_sub_nc_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: v_sub_nc_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x52,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x7e,0x08,0x08,0x52 # GFX1250: v_sub_nc_u64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x52] @@ -320,7 +320,7 @@ # GFX1250: v_mul_u64_e32 v[4:5], 0x3f717273, v[4:5] ; encoding: [0xff,0x08,0x08,0x54,0x73,0x72,0x71,0x3f] 0xff,0x08,0x08,0x54,0x56,0x34,0x12,0xaf -# GFX1250: v_mul_u64_e32 v[4:5], lit64(0xaf123456), v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: v_mul_u64_e32 v[4:5], 0xaf123456, v[4:5] ; encoding: [0xfe,0x08,0x08,0x54,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x7e,0x08,0x08,0x54 # GFX1250: v_mul_u64_e32 v[4:5], exec, v[4:5] ; encoding: [0x7e,0x08,0x08,0x54] @@ -377,13 +377,13 @@ # GFX1250: v_fmaak_f64 v[254:255], 0x405ec000, v[2:3], 0x405ec000 ; encoding: [0xfe,0x04,0xfc,0x49,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] 0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 -# GFX1250: v_fmaak_f64 v[254:255], lit64(0x405ec00012345678), v[254:255], lit64(0x405ec00012345678) ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +# GFX1250: v_fmaak_f64 v[254:255], 0x405ec00012345678, v[254:255], 0x405ec00012345678 ; encoding: [0xfe,0xfc,0xfd,0x49,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] 0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 -# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[6:7], lit64(0x405ec66666666666) ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +# GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[6:7], 0x405ec66666666666 ; encoding: [0xfe,0x0c,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] 0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 -# GFX1250: v_fmaak_f64 v[4:5], lit64(0x405ec66666666666), v[8:9], lit64(0x405ec66666666666) ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +# GFX1250: v_fmaak_f64 v[4:5], 0x405ec66666666666, v[8:9], 0x405ec66666666666 ; encoding: [0xfe,0x10,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] 0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f # GFX1250: v_fmaak_f64 v[4:5], 1.0, v[8:9], 0x3ff00000 ; encoding: [0xf2,0x10,0x08,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f] @@ -395,7 +395,7 @@ # GFX1250: v_fmaak_f64 v[4:5], lit64(0x7e8), v[8:9], lit64(0x7e8) ; encoding: [0xfe,0x10,0x08,0x48,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] 0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 -# GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], lit64(0x405ec66666666666) ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +# GFX1250: v_fmaak_f64 v[4:5], v[2:3], v[2:3], 0x405ec66666666666 ; encoding: [0x02,0x05,0x08,0x48,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] 0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 # GFX1250: v_fmaak_f64 v[6:7], -1, v[8:9], 0x405ec000 ; encoding: [0xc1,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] @@ -410,7 +410,7 @@ # GFX1250: v_fmaak_f64 v[6:7], null, v[8:9], 0x405ec000 ; encoding: [0x7c,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] 0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 -# GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], lit64(0x405ec00012345678) ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +# GFX1250: v_fmaak_f64 v[6:7], s[2:3], v[8:9], 0x405ec00012345678 ; encoding: [0x02,0x10,0x0c,0x48,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] 0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 # GFX1250: v_fmaak_f64 v[6:7], src_scc, v[8:9], 0x405ec000 ; encoding: [0xfd,0x10,0x0c,0x48,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] @@ -431,13 +431,13 @@ # GFX1250: v_fmamk_f64 v[254:255], 0x405ec000, 0x405ec000, v[2:3] ; encoding: [0xfe,0x04,0xfc,0x47,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] 0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 -# GFX1250: v_fmamk_f64 v[254:255], lit64(0x405ec00012345678), lit64(0x405ec00012345678), v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +# GFX1250: v_fmamk_f64 v[254:255], 0x405ec00012345678, 0x405ec00012345678, v[254:255] ; encoding: [0xfe,0xfc,0xfd,0x47,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] 0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 -# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +# GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[6:7] ; encoding: [0xfe,0x0c,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] 0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 -# GFX1250: v_fmamk_f64 v[4:5], lit64(0x405ec66666666666), lit64(0x405ec66666666666), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +# GFX1250: v_fmamk_f64 v[4:5], 0x405ec66666666666, 0x405ec66666666666, v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] 0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f # GFX1250: v_fmamk_f64 v[4:5], 1.0, 0x3ff00000, v[6:7] ; encoding: [0xf2,0x0c,0x08,0x46,0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f] @@ -449,7 +449,7 @@ # GFX1250: v_fmamk_f64 v[4:5], lit64(0x7e8), lit64(0x7e8), v[8:9] ; encoding: [0xfe,0x10,0x08,0x46,0xe8,0x07,0x00,0x00,0x00,0x00,0x00,0x00] 0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40 -# GFX1250: v_fmamk_f64 v[4:5], v[2:3], lit64(0x405ec66666666666), v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] +# GFX1250: v_fmamk_f64 v[4:5], v[2:3], 0x405ec66666666666, v[6:7] ; encoding: [0x02,0x0d,0x08,0x46,0x66,0x66,0x66,0x66,0x66,0xc6,0x5e,0x40] 0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 # GFX1250: v_fmamk_f64 v[6:7], -1, 0x405ec000, v[2:3] ; encoding: [0xc1,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] @@ -464,7 +464,7 @@ # GFX1250: v_fmamk_f64 v[6:7], null, 0x405ec000, v[2:3] ; encoding: [0x7c,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] 0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40 -# GFX1250: v_fmamk_f64 v[6:7], s[2:3], lit64(0x405ec00012345678), v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] +# GFX1250: v_fmamk_f64 v[6:7], s[2:3], 0x405ec00012345678, v[2:3] ; encoding: [0x02,0x04,0x0c,0x46,0x78,0x56,0x34,0x12,0x00,0xc0,0x5e,0x40] 0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40 # GFX1250: v_fmamk_f64 v[6:7], src_scc, 0x405ec000, v[2:3] ; encoding: [0xfd,0x04,0x0c,0x46,0x00,0x00,0x00,0x00,0x00,0xc0,0x5e,0x40] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt index c88fbc2..06ef877 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt @@ -868,7 +868,7 @@ 0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x2d,0x80,0xbe # GFX12: s_and_not0_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2d,0x80,0xbe] @@ -959,7 +959,7 @@ 0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_not0_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_not0_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x35,0x80,0xbe # GFX12: s_and_not0_wrexec_b64 s[0:1], -1 ; encoding: [0xc1,0x35,0x80,0xbe] @@ -1050,7 +1050,7 @@ 0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x31,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x31,0x80,0xbe # GFX12: s_and_not1_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x31,0x80,0xbe] @@ -1141,7 +1141,7 @@ 0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_not1_wrexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_not1_wrexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x37,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x37,0x80,0xbe # GFX12: s_and_not1_wrexec_b64 s[0:1], -1 ; encoding: [0xc1,0x37,0x80,0xbe] @@ -1232,7 +1232,7 @@ 0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x21,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x21,0x80,0xbe # GFX12: s_and_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x21,0x80,0xbe] @@ -1341,7 +1341,7 @@ 0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_bcnt0_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_bcnt0_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x17,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x17,0x80,0xbe # GFX12: s_bcnt0_i32_b64 s0, -1 ; encoding: [0xc1,0x17,0x80,0xbe] @@ -1453,7 +1453,7 @@ 0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_bcnt1_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_bcnt1_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x19,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x19,0x80,0xbe # GFX12: s_bcnt1_i32_b64 s0, -1 ; encoding: [0xc1,0x19,0x80,0xbe] @@ -1832,7 +1832,7 @@ 0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_brev_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_brev_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x05,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x05,0x80,0xbe # GFX12: s_brev_b64 s[0:1], -1 ; encoding: [0xc1,0x05,0x80,0xbe] @@ -1887,7 +1887,7 @@ 0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xff,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_cls_i32_i64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_cls_i32_i64 s0, 0xaf123456 ; encoding: [0xfe,0x0d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x0d,0x80,0xbe # GFX12: s_cls_i32_i64 s0, -1 ; encoding: [0xc1,0x0d,0x80,0xbe] @@ -2053,7 +2053,7 @@ 0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xff,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_clz_i32_u64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_clz_i32_u64 s0, 0xaf123456 ; encoding: [0xfe,0x0b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x0b,0x80,0xbe # GFX12: s_clz_i32_u64 s0, -1 ; encoding: [0xc1,0x0b,0x80,0xbe] @@ -2159,7 +2159,7 @@ 0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_cmov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_cmov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x03,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x03,0x80,0xbe # GFX12: s_cmov_b64 s[0:1], -1 ; encoding: [0xc1,0x03,0x80,0xbe] @@ -2268,7 +2268,7 @@ 0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xff,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_ctz_i32_b64 s0, lit64(0xaf123456) ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_ctz_i32_b64 s0, 0xaf123456 ; encoding: [0xfe,0x09,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x09,0x80,0xbe # GFX12: s_ctz_i32_b64 s0, -1 ; encoding: [0xc1,0x09,0x80,0xbe] @@ -2396,7 +2396,7 @@ 0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_mov_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_mov_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x01,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x01,0x80,0xbe # GFX12: s_mov_b64 s[0:1], -1 ; encoding: [0xc1,0x01,0x80,0xbe] @@ -2493,7 +2493,7 @@ 0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_movreld_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_movreld_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x43,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x43,0x80,0xbe # GFX12: s_movreld_b64 s[0:1], -1 ; encoding: [0xc1,0x43,0x80,0xbe] @@ -2662,7 +2662,7 @@ 0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_nand_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_nand_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x27,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x27,0x80,0xbe # GFX12: s_nand_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x27,0x80,0xbe] @@ -2753,7 +2753,7 @@ 0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_nor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_nor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x29,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x29,0x80,0xbe # GFX12: s_nor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x29,0x80,0xbe] @@ -2856,7 +2856,7 @@ 0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_not_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_not_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x1f,0x80,0xbe # GFX12: s_not_b64 s[0:1], -1 ; encoding: [0xc1,0x1f,0x80,0xbe] @@ -2947,7 +2947,7 @@ 0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_or_not0_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_or_not0_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2f,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x2f,0x80,0xbe # GFX12: s_or_not0_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2f,0x80,0xbe] @@ -3038,7 +3038,7 @@ 0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_or_not1_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_or_not1_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x33,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x33,0x80,0xbe # GFX12: s_or_not1_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x33,0x80,0xbe] @@ -3129,7 +3129,7 @@ 0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_or_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_or_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x23,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x23,0x80,0xbe # GFX12: s_or_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x23,0x80,0xbe] @@ -3232,7 +3232,7 @@ 0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_quadmask_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_quadmask_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x1b,0x80,0xbe # GFX12: s_quadmask_b64 s[0:1], -1 ; encoding: [0xc1,0x1b,0x80,0xbe] @@ -3549,7 +3549,7 @@ 0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_wqm_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_wqm_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x1d,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x1d,0x80,0xbe # GFX12: s_wqm_b64 s[0:1], -1 ; encoding: [0xc1,0x1d,0x80,0xbe] @@ -3640,7 +3640,7 @@ 0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_xnor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_xnor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x2b,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x2b,0x80,0xbe # GFX12: s_xnor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x2b,0x80,0xbe] @@ -3731,7 +3731,7 @@ 0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf # GFX1200: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xff,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf] -# GFX1250: s_xor_saveexec_b64 s[0:1], lit64(0xaf123456) ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_xor_saveexec_b64 s[0:1], 0xaf123456 ; encoding: [0xfe,0x25,0x80,0xbe,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x25,0x80,0xbe # GFX12: s_xor_saveexec_b64 s[0:1], -1 ; encoding: [0xc1,0x25,0x80,0xbe] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt index d889931..47b7408 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop2.txt @@ -56,7 +56,7 @@ 0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf # GFX1200: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf] -# GFX1250: s_add_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_add_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0x7e,0x80,0xa9 # GFX12: s_add_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xa9] @@ -81,7 +81,7 @@ 0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf # GFX1200: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xa9,0x56,0x34,0x12,0xaf] -# GFX1250: s_add_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_add_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xa9,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0x04,0x00,0xaa # GFX12: s_sub_nc_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x00,0xaa] @@ -136,7 +136,7 @@ 0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf # GFX1200: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf] -# GFX1250: s_sub_nc_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_sub_nc_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0x7e,0x00,0xaa # GFX12: s_sub_nc_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x00,0xaa] @@ -161,7 +161,7 @@ 0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf # GFX1200: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x00,0xaa,0x56,0x34,0x12,0xaf] -# GFX1250: s_sub_nc_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_sub_nc_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x00,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0x04,0x80,0xaa # GFX12: s_mul_u64 s[0:1], s[2:3], s[4:5] ; encoding: [0x02,0x04,0x80,0xaa] @@ -216,7 +216,7 @@ 0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf # GFX1200: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xff,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf] -# GFX1250: s_mul_u64 s[0:1], lit64(0xaf123456), s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_mul_u64 s[0:1], 0xaf123456, s[2:3] ; encoding: [0xfe,0x02,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0x7e,0x80,0xaa # GFX12: s_mul_u64 s[0:1], s[2:3], exec ; encoding: [0x02,0x7e,0x80,0xaa] @@ -241,7 +241,7 @@ 0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf # GFX1200: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0xaa,0x56,0x34,0x12,0xaf] -# GFX1250: s_mul_u64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_mul_u64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0xaa,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x01,0x02,0x05,0xa0 # GFX12: s_add_f32 s5, s1, s2 ; encoding: [0x01,0x02,0x05,0xa0] @@ -1697,7 +1697,7 @@ 0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf # GFX1200: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x8b # GFX12: s_and_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8b] @@ -1725,7 +1725,7 @@ 0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf # GFX1200: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8b,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8b,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x8b # GFX12: s_and_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8b] @@ -1882,7 +1882,7 @@ 0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf # GFX1200: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x91,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x91 # GFX12: s_and_not1_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x91] @@ -1910,7 +1910,7 @@ 0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf # GFX1200: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x91,0x56,0x34,0x12,0xaf] -# GFX1250: s_and_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_and_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x91,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x91 # GFX12: s_and_not1_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x91] @@ -2067,7 +2067,7 @@ 0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf # GFX1200: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x86,0x56,0x34,0x12,0xaf] -# GFX1250: s_ashr_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_ashr_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x86,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x86 # GFX12: s_ashr_i64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x86] @@ -2251,7 +2251,7 @@ 0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf # GFX1200: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x94,0x56,0x34,0x12,0xaf] -# GFX1250: s_bfe_i64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_bfe_i64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x94 # GFX12: s_bfe_i64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x94] @@ -2435,7 +2435,7 @@ 0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf # GFX1200: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x00,0x94,0x56,0x34,0x12,0xaf] -# GFX1250: s_bfe_u64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_bfe_u64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x00,0x94,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x00,0x94 # GFX12: s_bfe_u64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x00,0x94] @@ -2820,7 +2820,7 @@ 0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf # GFX1200: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x98,0x56,0x34,0x12,0xaf] -# GFX1250: s_cselect_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_cselect_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x98 # GFX12: s_cselect_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x98] @@ -2848,7 +2848,7 @@ 0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf # GFX1200: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x98,0x56,0x34,0x12,0xaf] -# GFX1250: s_cselect_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_cselect_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x98,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x98 # GFX12: s_cselect_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x98] @@ -3425,7 +3425,7 @@ 0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf # GFX1200: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x84,0x56,0x34,0x12,0xaf] -# GFX1250: s_lshl_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_lshl_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x84,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x84 # GFX12: s_lshl_b64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x84] @@ -3609,7 +3609,7 @@ 0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf # GFX1200: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xff,0x04,0x80,0x85,0x56,0x34,0x12,0xaf] -# GFX1250: s_lshr_b64 s[0:1], lit64(0xaf123456), s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_lshr_b64 s[0:1], 0xaf123456, s4 ; encoding: [0xfe,0x04,0x80,0x85,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x85 # GFX12: s_lshr_b64 s[0:1], -1, s4 ; encoding: [0xc1,0x04,0x80,0x85] @@ -4528,7 +4528,7 @@ 0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf # GFX1200: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf] -# GFX1250: s_nand_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_nand_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x8e # GFX12: s_nand_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8e] @@ -4556,7 +4556,7 @@ 0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf # GFX1200: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8e,0x56,0x34,0x12,0xaf] -# GFX1250: s_nand_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_nand_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8e,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x8e # GFX12: s_nand_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8e] @@ -4713,7 +4713,7 @@ 0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf # GFX1200: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf] -# GFX1250: s_nor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_nor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x8f # GFX12: s_nor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8f] @@ -4741,7 +4741,7 @@ 0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf # GFX1200: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8f,0x56,0x34,0x12,0xaf] -# GFX1250: s_nor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_nor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8f,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x8f # GFX12: s_nor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8f] @@ -4898,7 +4898,7 @@ 0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf # GFX1200: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf] -# GFX1250: s_or_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_or_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x8c # GFX12: s_or_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8c] @@ -4926,7 +4926,7 @@ 0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf # GFX1200: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8c,0x56,0x34,0x12,0xaf] -# GFX1250: s_or_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_or_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8c,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x8c # GFX12: s_or_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8c] @@ -5083,7 +5083,7 @@ 0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf # GFX1200: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x92,0x56,0x34,0x12,0xaf] -# GFX1250: s_or_not1_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_or_not1_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x92 # GFX12: s_or_not1_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x92] @@ -5111,7 +5111,7 @@ 0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf # GFX1200: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x92,0x56,0x34,0x12,0xaf] -# GFX1250: s_or_not1_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_or_not1_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x92,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x92 # GFX12: s_or_not1_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x92] @@ -5898,7 +5898,7 @@ 0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf # GFX1200: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x90,0x56,0x34,0x12,0xaf] -# GFX1250: s_xnor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_xnor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x90 # GFX12: s_xnor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x90] @@ -5926,7 +5926,7 @@ 0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf # GFX1200: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x90,0x56,0x34,0x12,0xaf] -# GFX1250: s_xnor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_xnor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x90,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x90 # GFX12: s_xnor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x90] @@ -6083,7 +6083,7 @@ 0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf # GFX1200: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xff,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf] -# GFX1250: s_xor_b64 s[0:1], lit64(0xaf123456), s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_xor_b64 s[0:1], 0xaf123456, s[4:5] ; encoding: [0xfe,0x04,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0xc1,0x04,0x80,0x8d # GFX12: s_xor_b64 s[0:1], -1, s[4:5] ; encoding: [0xc1,0x04,0x80,0x8d] @@ -6111,7 +6111,7 @@ 0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf # GFX1200: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xff,0x80,0x8d,0x56,0x34,0x12,0xaf] -# GFX1250: s_xor_b64 s[0:1], s[2:3], lit64(0xaf123456) ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_xor_b64 s[0:1], s[2:3], 0xaf123456 ; encoding: [0x02,0xfe,0x80,0x8d,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x02,0xc1,0x80,0x8d # GFX12: s_xor_b64 s[0:1], s[2:3], -1 ; encoding: [0x02,0xc1,0x80,0x8d] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt index a8da16f..9355582 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopc.txt @@ -1492,7 +1492,7 @@ 0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf # GFX1200: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x10,0xbf,0x56,0x34,0x12,0xaf] -# GFX1250: s_cmp_eq_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_cmp_eq_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x10,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x00,0xc1,0x10,0xbf # GFX12: s_cmp_eq_u64 s[0:1], -1 ; encoding: [0x00,0xc1,0x10,0xbf] @@ -2015,7 +2015,7 @@ 0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf # GFX1200: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xff,0x11,0xbf,0x56,0x34,0x12,0xaf] -# GFX1250: s_cmp_lg_u64 s[0:1], lit64(0xaf123456) ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] +# GFX1250: s_cmp_lg_u64 s[0:1], 0xaf123456 ; encoding: [0x00,0xfe,0x11,0xbf,0x56,0x34,0x12,0xaf,0x00,0x00,0x00,0x00] 0x00,0xc1,0x11,0xbf # GFX12: s_cmp_lg_u64 s[0:1], -1 ; encoding: [0x00,0xc1,0x11,0xbf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt index 7a7be57..d6a176e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop1_dpp8.txt @@ -1,10 +1,10 @@ # NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX1200-FAKE16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX1200-FAKE16 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16,GFX1250-FAKE16 %s 0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05 # GFX12: v_bfrev_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x70,0x0a,0x7e,0x01,0x77,0x39,0x05] @@ -22,7 +22,8 @@ 0xe9,0xb8,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_ceil_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb8,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xb8,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_ceil_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb8,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -55,7 +56,8 @@ 0xe9,0xc2,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_cos_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc2,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xc2,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cos_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc2,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -94,7 +96,8 @@ 0xe9,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_f16_f32_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x14,0x0a,0x7f,0x01,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[1:2]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x01,0x77,0x39,0x05] 0xea,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_f16_f32_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0x14,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -109,7 +112,8 @@ 0xe9,0xa2,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_f16_i16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa2,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xa2,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_f16_i16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa2,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -124,7 +128,8 @@ 0xe9,0xa0,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_f16_u16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa0,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xa0,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_f16_u16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa0,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -197,7 +202,8 @@ 0xe9,0xa6,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_i16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa6,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xa6,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_i16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa6,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -240,7 +246,8 @@ 0xe9,0xc6,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_norm_i16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc6,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xc6,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_norm_i16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc6,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -255,7 +262,8 @@ 0xe9,0xc8,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_norm_u16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc8,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xc8,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_norm_u16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc8,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -276,7 +284,8 @@ 0xe9,0xa4,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_cvt_u16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa4,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xa4,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_cvt_u16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa4,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -313,7 +322,8 @@ 0xe9,0xb0,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_exp_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb0,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xb0,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_exp_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb0,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -334,7 +344,8 @@ 0xe9,0xb6,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_floor_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb6,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xb6,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_floor_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb6,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -355,7 +366,8 @@ 0xe9,0xbe,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_fract_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xbe,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xbe,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_fract_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xbe,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -376,7 +388,8 @@ 0xe9,0xb4,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_frexp_exp_i16_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb4,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xb4,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_frexp_exp_i16_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb4,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -397,7 +410,8 @@ 0xe9,0xb2,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_frexp_mant_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xb2,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xb2,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_frexp_mant_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xb2,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -418,7 +432,8 @@ 0xe9,0xae,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_log_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xae,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xae,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_log_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xae,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -469,7 +484,8 @@ 0xe9,0xd2,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_not_b16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xd2,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xd2,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_not_b16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xd2,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -491,7 +507,8 @@ 0xe9,0xa8,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_rcp_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xa8,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xa8,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_rcp_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xa8,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -518,7 +535,8 @@ 0xe9,0xbc,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_rndne_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xbc,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xbc,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_rndne_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xbc,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -539,7 +557,8 @@ 0xe9,0xac,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_rsq_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xac,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xac,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_rsq_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xac,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -560,7 +579,8 @@ 0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05 # GFX12-REAL16: v_sat_pk_u8_i16_dpp v5.h, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc4,0x0a,0x7f,0x01,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[1:2], v[187:188] ; encoding: [0x01,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[1:2]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x01,0x77,0x39,0x05] 0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_sat_pk_u8_i16_dpp v127.h, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc4,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -575,7 +595,8 @@ 0xe9,0xc0,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_sin_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xc0,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xc0,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_sin_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xc0,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -596,7 +617,8 @@ 0xe9,0xaa,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_sqrt_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xaa,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xaa,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_sqrt_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xaa,0xfe,0x7f,0xff,0x00,0x00,0x00] @@ -617,7 +639,8 @@ 0xe9,0xba,0x0a,0x7f,0x81,0x77,0x39,0x05 # GFX12-REAL16: v_trunc_f16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xba,0x0a,0x7f,0x81,0x77,0x39,0x05] -# GFX12-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1200-FAKE16: v_add_f64_e32 v[156:157], v[129:130], v[187:188] ; encoding: [0x81,0x77,0x39,0x05] +# GFX1250-FAKE16: v_add_f64_e32 v[156:157], v[129:130]/*Invalid register, operand has 'VS_64_Align2' register class*/, v[187:188]/*Invalid register, operand has 'VReg_64_Align2' register class*/ ; encoding: [0x81,0x77,0x39,0x05] 0xea,0xba,0xfe,0x7f,0xff,0x00,0x00,0x00 # GFX12-REAL16: v_trunc_f16_dpp v127.h, v127.h dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xba,0xfe,0x7f,0xff,0x00,0x00,0x00] diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll index 6fa57f1..3536932 100644 --- a/llvm/test/Other/new-pm-print-pipeline.ll +++ b/llvm/test/Other/new-pm-print-pipeline.ll @@ -50,7 +50,7 @@ ; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>) ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18 -; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>) +; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;no-switch-to-arithmetic;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-arithmetic;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>) ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19 ; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>) diff --git a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll index bb3001e..a7d3446 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll @@ -91,12 +91,13 @@ @ctz7.table = internal unnamed_addr constant [32 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 1 -define i32 @ctz1(i32 %x) { +define i32 @ctz1(i32 %x) !prof !0 { ; CHECK-LABEL: @ctz1( +; CHECK: !prof [[PROF_0:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true) ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]], !prof [[PROF_1:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[CONV]] @@ -498,3 +499,7 @@ entry: %conv = zext i8 %0 to i32 ret i32 %conv } + +!0 = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_1]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-dereferencing-pointer.ll b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-dereferencing-pointer.ll index d2ecb57..0e5c4f0 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-dereferencing-pointer.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-dereferencing-pointer.ll @@ -20,13 +20,14 @@ @table = internal unnamed_addr constant [64 x i32] [i32 0, i32 1, i32 12, i32 2, i32 13, i32 22, i32 17, i32 3, i32 14, i32 33, i32 23, i32 36, i32 18, i32 58, i32 28, i32 4, i32 62, i32 15, i32 34, i32 26, i32 24, i32 48, i32 50, i32 37, i32 19, i32 55, i32 59, i32 52, i32 29, i32 44, i32 39, i32 5, i32 63, i32 11, i32 21, i32 16, i32 32, i32 35, i32 57, i32 27, i32 61, i32 25, i32 47, i32 49, i32 54, i32 51, i32 43, i32 38, i32 10, i32 20, i32 31, i32 56, i32 60, i32 46, i32 53, i32 42, i32 9, i32 30, i32 45, i32 41, i32 8, i32 40, i32 7, i32 6], align 4 -define i32 @ctz6(ptr nocapture readonly %b) { +define i32 @ctz6(ptr nocapture readonly %b) !prof !0 { ; CHECK-LABEL: @ctz6( +; CHECK: !prof [[PROF_0:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.cttz.i64(i64 [[TMP0]], i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP0]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP1]], !prof [[PROF_1:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[TMP4]] ; @@ -40,3 +41,7 @@ entry: %1 = load i32, ptr %arrayidx, align 4 ret i32 %1 } + +!0 = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_1]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-non-argument-value.ll b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-non-argument-value.ll index f63badb..a7732f0 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-non-argument-value.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-non-argument-value.ll @@ -20,13 +20,14 @@ @.str = private constant [3 x i8] c"%u\00", align 1 @test.table = internal constant [32 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 1 -define i32 @test() { +define i32 @test() !prof !0 { ; CHECK-LABEL: @test( +; CHECK: !prof [[PROF_0:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @x, align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]], !prof [[PROF_1:![0-9]+]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP4]] to i32 ; CHECK-NEXT: ret i32 [[CONV]] @@ -43,3 +44,7 @@ entry: %conv = zext i8 %1 to i32 ret i32 %conv } + +!0 = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_1]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-zero-element.ll b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-zero-element.ll index bbdd9b7c..5f9b4ce 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-zero-element.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-zero-element.ll @@ -3,12 +3,13 @@ @ctz1.table = internal constant [32 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 1 -define i32 @ctz1(i32 %x) { +define i32 @ctz1(i32 %x) !prof !0 { ; CHECK-LABEL: @ctz1( +; CHECK: !prof [[PROF_0:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true) ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]], !prof [[PROF_1:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[CONV]] @@ -24,3 +25,7 @@ entry: %conv = zext i8 %0 to i32 ret i32 %conv } + +!0 = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_0]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF_1]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll index 3a306a4..ccef61d 100644 --- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll +++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll @@ -12,7 +12,7 @@ target triple = "sparcv9-unknown-unknown" define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) { ; CHECK-LABEL: @test_cmpxchg_i8( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -45,7 +45,7 @@ define i8 @test_cmpxchg_i8(ptr %arg, i8 %old, i8 %new) { ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8 ; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[RET:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i8 [[RET]] ; @@ -58,7 +58,7 @@ entry: define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) { ; CHECK-LABEL: @test_cmpxchg_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -91,7 +91,7 @@ define i16 @test_cmpxchg_i16(ptr %arg, i16 %old, i16 %new) { ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 ; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: [[RET:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0 ; CHECK-NEXT: ret i16 [[RET]] ; @@ -104,7 +104,7 @@ entry: define i16 @test_add_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_add_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -130,7 +130,7 @@ define i16 @test_add_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -141,7 +141,7 @@ entry: define i16 @test_xor_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_xor_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -164,7 +164,7 @@ define i16 @test_xor_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -175,7 +175,7 @@ entry: define i16 @test_or_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_or_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -198,7 +198,7 @@ define i16 @test_or_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -209,7 +209,7 @@ entry: define i16 @test_and_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_and_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -233,7 +233,7 @@ define i16 @test_and_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED]] ; entry: @@ -244,7 +244,7 @@ entry: define i16 @test_min_i16(ptr %arg, i16 %val) { ; CHECK-LABEL: @test_min_i16( ; CHECK-NEXT: entry: -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARG:%.*]], i64 -4) ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP0]], 3 @@ -272,7 +272,7 @@ define i16 @test_min_i16(ptr %arg, i16 %val) { ; CHECK: atomicrmw.end: ; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret i16 [[EXTRACTED3]] ; entry: @@ -282,7 +282,7 @@ entry: define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) { ; CHECK-LABEL: @test_atomicrmw_fadd_f16( -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence release ; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64 ; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3 @@ -312,7 +312,7 @@ define half @test_atomicrmw_fadd_f16(ptr %ptr, half %value) { ; CHECK-NEXT: [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]] ; CHECK-NEXT: [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[EXTRACTED3]] to half -; CHECK-NEXT: fence seq_cst +; CHECK-NEXT: fence acquire ; CHECK-NEXT: ret half [[TMP8]] ; %res = atomicrmw fadd ptr %ptr, half %value seq_cst diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index 4173c32..f45798b 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -7,10 +7,10 @@ ; state, and the block that determines the next state. ; < path of BBs that form a cycle > [ state, determinator ] define i32 @test1(i32 %num) !prof !0{ -; CHECK: < case2 for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < case1 for.inc for.body > [ 2, for.inc ] -; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ] +; CHECK: < case2, for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < case1, for.inc, for.body > [ 2, for.inc ] +; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ] entry: br label %for.body @@ -47,12 +47,12 @@ for.end: ; complicated CFG. Here the FSM is represented as a nested loop, with ; fallthrough cases. define i32 @test2(i32 %init) { -; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ] -; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ] -; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ] -; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ] -; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ] -; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ] +; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ] +; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ] +; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ] +; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ] +; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ] entry: %cmp = icmp eq i32 %init, 0 %sel = select i1 %cmp, i32 0, i32 2 @@ -187,12 +187,12 @@ bb66: ; preds = %bb59 ; Value %init is not predictable but it's okay since it is the value initial to the switch. define i32 @initial.value.positive1(i32 %init) !prof !0 { -; CHECK: < loop.1.backedge loop.1 loop.2 loop.3 > [ 1, loop.1 ] -; CHECK-NEXT: < case4 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 2, loop.1.backedge ] -; CHECK-NEXT: < case2 loop.1.backedge state.1.be2.si.unfold.false loop.1 loop.2 loop.3 > [ 4, loop.1.backedge ] -; CHECK-NEXT: < case4 loop.2.backedge loop.2 loop.3 > [ 3, loop.2.backedge ] -; CHECK-NEXT: < case3 loop.2.backedge loop.2 loop.3 > [ 0, loop.2.backedge ] -; CHECK-NEXT: < case2 loop.3 > [ 3, loop.3 ] +; CHECK: < loop.1.backedge, loop.1, loop.2, loop.3 > [ 1, loop.1 ] +; CHECK-NEXT: < case4, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 2, loop.1.backedge ] +; CHECK-NEXT: < case2, loop.1.backedge, state.1.be2.si.unfold.false, loop.1, loop.2, loop.3 > [ 4, loop.1.backedge ] +; CHECK-NEXT: < case4, loop.2.backedge, loop.2, loop.3 > [ 3, loop.2.backedge ] +; CHECK-NEXT: < case3, loop.2.backedge, loop.2, loop.3 > [ 0, loop.2.backedge ] +; CHECK-NEXT: < case2, loop.3 > [ 3, loop.3 ] entry: %cmp = icmp eq i32 %init, 0 br label %loop.1 diff --git a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll index 92747629..cb7c46e 100644 --- a/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll +++ b/llvm/test/Transforms/DFAJumpThreading/max-path-length.ll @@ -9,9 +9,9 @@ ; too long so that it is not jump-threaded. define i32 @max_path_length(i32 %num) { ; CHECK-NOT: 3, case1 -; CHECK: < case2 for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < for.inc for.body > [ 1, for.inc ] -; CHECK-NEXT: < case2 sel.si.unfold.false for.inc for.body > [ 2, sel.si.unfold.false ] +; CHECK: < case2, for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < for.inc, for.body > [ 1, for.inc ] +; CHECK-NEXT: < case2, sel.si.unfold.false, for.inc, for.body > [ 2, sel.si.unfold.false ] ; CHECK-NEXT: DFA-JT: Renaming non-local uses of: entry: br label %for.body diff --git a/llvm/test/Transforms/GVN/assume-equal.ll b/llvm/test/Transforms/GVN/assume-equal.ll index 0c922da..bbbc5c5 100644 --- a/llvm/test/Transforms/GVN/assume-equal.ll +++ b/llvm/test/Transforms/GVN/assume-equal.ll @@ -221,21 +221,22 @@ define i32 @_Z1ii(i32 %p) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[P]], 42 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: br i1 true, label %[[BB2:.*]], label %[[BB2]] -; CHECK: [[BB2]]: -; CHECK-NEXT: br i1 true, label %[[BB2]], label %[[BB2]] -; CHECK: [[BB0:.*:]] +; CHECK-NEXT: br i1 true, label %[[COMMON:.*]], label %[[COMMON]] +; CHECK: [[COMMON]]: +; CHECK-NEXT: br i1 true, label %[[COMMON]], label %[[COMMON]] +; CHECK: [[EXIT:.*:]] ; CHECK-NEXT: ret i32 42 ; entry: %cmp = icmp eq i32 %p, 42 call void @llvm.assume(i1 %cmp) - br i1 %cmp, label %bb2, label %bb2 -bb2: + br i1 %cmp, label %common, label %common +common: call void @llvm.assume(i1 true) - br i1 %cmp, label %bb2, label %bb2 + br i1 %cmp, label %common, label %common +exit: ret i32 %p } @@ -357,8 +358,8 @@ define i8 @assume_ptr_eq_different_prov_matters(ptr %p, ptr %p2) { ret i8 %v } -define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) { -; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter( +define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp(ptr %p, ptr %p2) { +; CHECK-LABEL: define i1 @assume_ptr_eq_different_prov_does_not_matter_icmp( ; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) @@ -371,6 +372,36 @@ define i1 @assume_ptr_eq_different_prov_does_not_matter(ptr %p, ptr %p2) { ret i1 %c } +; This is not correct, as it may change the provenance exposed by ptrtoint. +; We still allow it for now. +define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoint( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: ret i64 [[INT]] +; + %cmp = icmp eq ptr %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoint ptr %p2 to i64 + ret i64 %int +} + +define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @assume_ptr_eq_different_prov_does_not_matter_ptrtoaddr( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P]], [[P2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[INT:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: ret i64 [[INT]] +; + %cmp = icmp eq ptr %p, %p2 + call void @llvm.assume(i1 %cmp) + %int = ptrtoaddr ptr %p2 to i64 + ret i64 %int +} + define i8 @assume_ptr_eq_same_prov(ptr %p, i64 %x) { ; CHECK-LABEL: define i8 @assume_ptr_eq_same_prov( ; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]]) { diff --git a/llvm/test/Transforms/GVN/ptrtoaddr.ll b/llvm/test/Transforms/GVN/ptrtoaddr.ll new file mode 100644 index 0000000..6d02bc6 --- /dev/null +++ b/llvm/test/Transforms/GVN/ptrtoaddr.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=gvn < %s | FileCheck %s + +define i64 @ptrtoaddr_same(ptr %p) { +; CHECK-LABEL: define i64 @ptrtoaddr_same( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: ret i64 0 +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} + +; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ +; in result type for the same input. +define i64 @ptrtoaddr_different(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @ptrtoaddr_different( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll new file mode 100644 index 0000000..2b22965 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=instcombine %s -S | FileCheck %s + +declare void @clobber.i32(i32) + +define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %lower.zext, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.range( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or disjoint i32 %upper.shl, %lower + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.range.commute(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.range.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or disjoint i32 %lower, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i32 @selective_shift_16.masked( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %lower.zext, %upper.shl + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %sel.masked = and i32 %sel, 65535 + ret i32 %sel.masked +} + +define i32 @selective_shift_16.masked.commute(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i32 @selective_shift_16.masked.commute( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32 +; CHECK-NEXT: ret i32 [[SEL]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %sel.masked = and i32 %sel, 65535 + ret i32 %sel.masked +} + +define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) { +; CHECK-LABEL: define <2 x i16> @selective_shift.v16( +; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16) +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer +; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]] +; CHECK-NEXT: ret <2 x i16> [[SEL_V]] +; + %upper.zext = zext <2 x i16> %upper to <2 x i32> + %upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16) + %lower.zext = zext <2 x i16> %lower to <2 x i32> + %pack = or disjoint <2 x i32> %upper.shl, %lower.zext + %mask.bit = and <2 x i32> %mask, splat(i32 16) + %sel = lshr <2 x i32> %pack, %mask.bit + %trunc = trunc <2 x i32> %sel to <2 x i16> + ret <2 x i16> %trunc +} + +define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.wide( +; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[SEL_V]] +; + %upper.zext = zext i16 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 16 + %lower.zext = zext i16 %lower to i64 + %pack = or disjoint i64 %upper.shl, %lower.zext + %mask.bit = and i64 %mask, 16 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i16 + ret i16 %trunc +} + +; narrow zext type blocks fold +define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.narrow( +; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i24 + %upper.shl = shl i24 %upper.zext, 16 + %lower.zext = zext i16 %lower to i24 + %pack = or disjoint i24 %upper.shl, %lower.zext + %mask.bit = and i24 %mask, 16 + %sel = lshr i24 %pack, %mask.bit + %trunc = trunc i24 %sel to i16 + ret i16 %trunc +} + +; %lower's upper bits block fold +define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16_norange( +; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16 +; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.shl = shl nuw i32 %upper, 16 + %pack = or i32 %upper.shl, %lower + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.0( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]]) +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]]) +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0 +; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]] +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + call void @clobber.i32(i32 %upper.zext) + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + call void @clobber.i32(i32 %lower.zext) + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; multi-use of %pack blocks fold +define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.1( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]]) +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + call void @clobber.i32(i32 %pack) + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; non-truncated use of %sel blocks fold +define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) { +; CHECK-LABEL: define i16 @selective_shift_16.mu.2( +; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16 +; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]]) +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16 +; CHECK-NEXT: ret i16 [[TRUNC]] +; + %upper.zext = zext i16 %upper to i32 + %upper.shl = shl nuw i32 %upper.zext, 16 + %lower.zext = zext i16 %lower to i32 + %pack = or disjoint i32 %upper.shl, %lower.zext + %mask.bit = and i32 %mask, 16 + %sel = lshr i32 %pack, %mask.bit + call void @clobber.i32(i32 %sel) + %trunc = trunc i32 %sel to i16 + ret i16 %trunc +} + +; bitwidth must be a power of 2 to fold +define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) { +; CHECK-LABEL: define i24 @selective_shift_24( +; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) { +; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48 +; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24 +; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48 +; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]] +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24 +; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]] +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24 +; CHECK-NEXT: ret i24 [[TRUNC]] +; + %upper.zext = zext i24 %upper to i48 + %upper.shl = shl nuw i48 %upper.zext, 24 + %lower.zext = zext i24 %lower to i48 + %pack = or disjoint i48 %upper.shl, %lower.zext + %mask.bit = and i48 %mask, 24 + %sel = lshr i48 %pack, %mask.bit + %trunc = trunc i48 %sel to i24 + ret i24 %trunc +} + +define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i32 @selective_shift_32( +; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: ret i32 [[SEL_V]] +; + %upper.zext = zext i32 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 32 + %lower.zext = zext i32 %lower to i64 + %pack = or disjoint i64 %upper.shl, %lower.zext + %mask.bit = and i64 %mask, 32 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i32 + ret i32 %trunc +} + +define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) { +; CHECK-LABEL: define i32 @selective_shift_32.commute( +; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) { +; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32 +; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0 +; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]] +; CHECK-NEXT: ret i32 [[SEL_V]] +; + %upper.zext = zext i32 %upper to i64 + %upper.shl = shl nuw i64 %upper.zext, 32 + %lower.zext = zext i32 %lower to i64 + %pack = or disjoint i64 %lower.zext, %upper.shl + %mask.bit = and i64 %mask, 32 + %sel = lshr i64 %pack, %mask.bit + %trunc = trunc i64 %sel to i32 + ret i32 %trunc +} diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll index 61b1331..49b9b7e 100644 --- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll +++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll @@ -1,6 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s -target datalayout = "p1:64:64:64:32" + +; The ptrtoaddr folds are also valid for pointers that have external state. +target datalayout = "pe1:64:64:64:32" + +@g = external global i8 +@g2 = external global i8 + +@g.as1 = external addrspace(1) global i8 +@g2.as1 = external addrspace(1) global i8 define i32 @ptrtoaddr_inttoptr_arg(i32 %a) { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_arg( @@ -24,14 +32,14 @@ define i32 @ptrtoaddr_inttoptr() { define i32 @ptrtoaddr_inttoptr_diff_size1() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size1() { -; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32) +; CHECK-NEXT: ret i32 -1 ; ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i64 -1 to ptr addrspace(1)) to i32) } define i32 @ptrtoaddr_inttoptr_diff_size2() { ; CHECK-LABEL: define i32 @ptrtoaddr_inttoptr_diff_size2() { -; CHECK-NEXT: ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32) +; CHECK-NEXT: ret i32 65535 ; ret i32 ptrtoaddr (ptr addrspace(1) inttoptr (i16 -1 to ptr addrspace(1)) to i32) } @@ -52,14 +60,52 @@ define i64 @ptr2addr2_inttoptr_noas2() { define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size1() { -; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64) +; CHECK-NEXT: ret i64 4294967295 ; ret i64 ptrtoaddr (ptr inttoptr (i32 -1 to ptr) to i64) } define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { ; CHECK-LABEL: define i64 @ptrtoaddr_inttoptr_noas_diff_size2() { -; CHECK-NEXT: ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64) +; CHECK-NEXT: ret i64 -1 ; ret i64 ptrtoaddr (ptr inttoptr (i128 -1 to ptr) to i64) } + +define i64 @ptrtoaddr_gep_null() { +; CHECK-LABEL: define i64 @ptrtoaddr_gep_null() { +; CHECK-NEXT: ret i64 42 +; + ret i64 ptrtoaddr (ptr getelementptr (i8, ptr null, i64 42) to i64) +} + +define i32 @ptrtoaddr_gep_null_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_gep_null_addrsize() { +; CHECK-NEXT: ret i32 42 +; + ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i32 42) to i32) +} + +define i64 @ptrtoaddr_gep_sub() { +; CHECK-LABEL: define i64 @ptrtoaddr_gep_sub() { +; CHECK-NEXT: ret i64 sub (i64 ptrtoaddr (ptr @g to i64), i64 ptrtoaddr (ptr @g2 to i64)) +; + ret i64 ptrtoaddr (ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoaddr (ptr @g2 to i64))) to i64) +} + +define i32 @ptrtoaddr_gep_sub_addrsize() { +; CHECK-LABEL: define i32 @ptrtoaddr_gep_sub_addrsize() { +; CHECK-NEXT: ret i32 sub (i32 ptrtoaddr (ptr addrspace(1) @g.as1 to i32), i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32)) +; + ret i32 ptrtoaddr (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) @g.as1, i32 sub (i32 0, i32 ptrtoaddr (ptr addrspace(1) @g2.as1 to i32))) to i32) +} + +; Don't fold inttoptr of ptrtoaddr away. inttoptr will pick a previously +; exposed provenance, which is not necessarily that of @g (especially as +; ptrtoaddr does not expose the provenance.) +define ptr @inttoptr_of_ptrtoaddr() { +; CHECK-LABEL: define ptr @inttoptr_of_ptrtoaddr() { +; CHECK-NEXT: ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) +; + ret ptr inttoptr (i64 ptrtoaddr (ptr @g to i64) to ptr) +} diff --git a/llvm/test/Transforms/LICM/vector-intrinsics.ll b/llvm/test/Transforms/LICM/vector-intrinsics.ll new file mode 100644 index 0000000..351773e --- /dev/null +++ b/llvm/test/Transforms/LICM/vector-intrinsics.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes='loop-mssa(licm)' -verify-memoryssa %s | FileCheck %s + +define i32 @reduce_umax(<2 x i32> %inv, i1 %c) { +; CHECK-LABEL: define i32 @reduce_umax( +; CHECK-SAME: <2 x i32> [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[REDUCE_UMAX:%.*]] = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> [[INV]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[REDUCE_UMAX]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %reduce.umax = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %inv) + %backedge.cond = icmp ult i32 %iv, %reduce.umax + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_umax(<2 x i32> %inv.l, <2 x i32> %inv.r, i1 %c) { +; CHECK-LABEL: define i32 @vp_umax( +; CHECK-SAME: <2 x i32> [[INV_L:%.*]], <2 x i32> [[INV_R:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[VP_UMAX:%.*]] = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> [[INV_L]], <2 x i32> [[INV_R]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UMAX]], i32 0 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C]], i1 [[BACKEDGE_COND]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.umax = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %inv.l, <2 x i32> %inv.r, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.umax, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_udiv(<2 x i32> %inv.q, <2 x i32> %inv.d, i1 %c) { +; CHECK-LABEL: define i32 @vp_udiv( +; CHECK-SAME: <2 x i32> [[INV_Q:%.*]], <2 x i32> [[INV_D:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_UDIV:%.*]] = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> [[INV_Q]], <2 x i32> [[INV_D]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_UDIV]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.udiv = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %inv.q, <2 x i32> %inv.d, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.udiv, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_load(ptr %inv, i1 %c) { +; CHECK-LABEL: define i32 @vp_load( +; CHECK-SAME: ptr [[INV:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: [[VP_LOAD:%.*]] = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr [[INV]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x i32> [[VP_LOAD]], i32 0 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[EXTRACT]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + %vp.load = call <2 x i32> @llvm.vp.load.v2i32(ptr %inv, <2 x i1> splat (i1 1), i32 2) + %extract = extractelement <2 x i32> %vp.load, i32 0 + %backedge.cond = icmp ult i32 %iv, %extract + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} + +define i32 @vp_store(<2 x i32> %inv.v, ptr %inv.p, i1 %c) { +; CHECK-LABEL: define i32 @vp_store( +; CHECK-SAME: <2 x i32> [[INV_V:%.*]], ptr [[INV_P:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[COND_TRUE:.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[COND_TRUE]], label %[[EXIT:.*]] +; CHECK: [[COND_TRUE]]: +; CHECK-NEXT: call void @llvm.vp.store.v2i32.p0(<2 x i32> [[INV_V]], ptr [[INV_P]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[BACKEDGE_COND:%.*]] = icmp ult i32 [[IV]], 10 +; CHECK-NEXT: br i1 [[BACKEDGE_COND]], label %[[LOOP]], label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i32 [ [[IV]], %[[COND_TRUE]] ], [ [[IV]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[IV_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %cond.true ] + %iv.next = add i32 %iv, 1 + br i1 %c, label %cond.true, label %exit + +cond.true: + call void @llvm.vp.store.v2i32(<2 x i32> %inv.v, ptr %inv.p, <2 x i1> splat (i1 1), i32 2) + %backedge.cond = icmp ult i32 %iv, 10 + br i1 %backedge.cond, label %loop, label %exit + +exit: + ret i32 %iv +} diff --git a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll deleted file mode 100644 index 72bc543..0000000 --- a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll +++ /dev/null @@ -1,164 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -passes='loop(loop-rotate)' -loop-rotate-multi=true | FileCheck %s - -; Test loop rotation with multiple exits, some of them - deoptimizing. -; We should end up with a latch which exit is non-deoptimizing, so we should rotate -; more than once. - -declare i32 @llvm.experimental.deoptimize.i32(...) - -define i32 @test_cond_with_one_deopt_exit(ptr nonnull %a, i64 %x) { -; Rotation done twice. -; Latch should be at the 2nd condition (for.cond2), exiting to %return. -; -; CHECK-LABEL: @test_cond_with_one_deopt_exit( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[VAL_A_IDX3:%.*]] = load i32, ptr %a, align 4 -; CHECK-NEXT: [[ZERO_CHECK4:%.*]] = icmp eq i32 [[VAL_A_IDX3]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK4]], label %deopt.exit, label %for.cond2.lr.ph -; CHECK: for.cond2.lr.ph: -; CHECK-NEXT: [[FOR_CHECK8:%.*]] = icmp ult i64 0, %x -; CHECK-NEXT: br i1 [[FOR_CHECK8]], label %for.body.lr.ph, label %return -; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body -; CHECK: for.cond2: -; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x -; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond2.return_crit_edge -; CHECK: for.body: -; CHECK: br label %for.tail -; CHECK: for.tail: -; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr -; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond1.deopt.exit_crit_edge, label %for.cond2 -; CHECK: for.cond2.return_crit_edge: -; CHECK-NEXT: {{%.*}} = phi i32 -; CHECK-NEXT: br label %return -; CHECK: return: -; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32 -; CHECK-NEXT: ret i32 [[SUM_LCSSA2]] -; CHECK: for.cond1.deopt.exit_crit_edge: -; CHECK-NEXT: {{%.*}} = phi i32 -; CHECK-NEXT: br label %deopt.exit -; CHECK: deopt.exit: -; CHECK: [[DEOPT_VAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 {{%.*}}) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL]] -; -entry: - br label %for.cond1 - -for.cond1: - %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ] - %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ] - %a.idx = getelementptr inbounds i32, ptr %a, i64 %idx - %val.a.idx = load i32, ptr %a.idx, align 4 - %zero.check = icmp eq i32 %val.a.idx, 0 - br i1 %zero.check, label %deopt.exit, label %for.cond2 - -for.cond2: - %for.check = icmp ult i64 %idx, %x - br i1 %for.check, label %for.body, label %return - -for.body: - br label %for.tail - -for.tail: - %sum.next = add i32 %sum, %val.a.idx - %idx.next = add nuw nsw i64 %idx, 1 - br label %for.cond1 - -return: - ret i32 %sum - -deopt.exit: - %deopt.val = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ] - ret i32 %deopt.val -} - -define i32 @test_cond_with_two_deopt_exits(ptr nonnull %a, i64 %x) { -; Rotation done three times. -; Latch should be at the 3rd condition (for.cond3), exiting to %return. -; -; CHECK-LABEL: @test_cond_with_two_deopt_exits( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_IDX_DEREF4:%.*]] = load ptr, ptr %a -; CHECK-NEXT: [[NULL_CHECK5:%.*]] = icmp eq ptr [[A_IDX_DEREF4]], null -; CHECK-NEXT: br i1 [[NULL_CHECK5]], label %deopt.exit1, label %for.cond2.lr.ph -; CHECK: for.cond2.lr.ph: -; CHECK-NEXT: [[VAL_A_IDX9:%.*]] = load i32, ptr [[A_IDX_DEREF4]], align 4 -; CHECK-NEXT: [[ZERO_CHECK10:%.*]] = icmp eq i32 [[VAL_A_IDX9]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK10]], label %deopt.exit2, label %for.cond3.lr.ph -; CHECK: for.cond3.lr.ph: -; CHECK-NEXT: [[FOR_CHECK14:%.*]] = icmp ult i64 0, %x -; CHECK-NEXT: br i1 [[FOR_CHECK14]], label %for.body.lr.ph, label %return -; CHECK: for.body.lr.ph: -; CHECK-NEXT: br label %for.body -; CHECK: for.cond2: -; CHECK: [[VAL_A_IDX:%.*]] = load i32, ptr -; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0 -; CHECK-NEXT: br i1 [[ZERO_CHECK]], label %for.cond2.deopt.exit2_crit_edge, label %for.cond3 -; CHECK: for.cond3: -; CHECK: [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x -; CHECK-NEXT: br i1 [[FOR_CHECK]], label %for.body, label %for.cond3.return_crit_edge -; CHECK: for.body: -; CHECK: br label %for.tail -; CHECK: for.tail: -; CHECK: [[IDX_NEXT:%.*]] = add nuw nsw i64 {{%.*}}, 1 -; CHECK: [[NULL_CHECK:%.*]] = icmp eq ptr {{%.*}}, null -; CHECK-NEXT: br i1 [[NULL_CHECK]], label %for.cond1.deopt.exit1_crit_edge, label %for.cond2 -; CHECK: for.cond3.return_crit_edge: -; CHECK-NEXT: [[SPLIT18:%.*]] = phi i32 -; CHECK-NEXT: br label %return -; CHECK: return: -; CHECK-NEXT: [[SUM_LCSSA2:%.*]] = phi i32 -; CHECK-NEXT: ret i32 [[SUM_LCSSA2]] -; CHECK: for.cond1.deopt.exit1_crit_edge: -; CHECK-NEXT: br label %deopt.exit1 -; CHECK: deopt.exit1: -; CHECK-NEXT: [[DEOPT_VAL1:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL1]] -; CHECK: for.cond2.deopt.exit2_crit_edge: -; CHECK-NEXT: [[SPLIT:%.*]] = phi i32 -; CHECK-NEXT: br label %deopt.exit2 -; CHECK: deopt.exit2: -; CHECK-NEXT: [[VAL_A_IDX_LCSSA:%.*]] = phi i32 -; CHECK-NEXT: [[DEOPT_VAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[VAL_A_IDX_LCSSA]]) ] -; CHECK-NEXT: ret i32 [[DEOPT_VAL2]] -; -entry: - br label %for.cond1 - -for.cond1: - %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ] - %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ] - %a.idx = getelementptr inbounds ptr, ptr %a, i64 %idx - %a.idx.deref = load ptr, ptr %a.idx - %null.check = icmp eq ptr %a.idx.deref, null - br i1 %null.check, label %deopt.exit1, label %for.cond2 - -for.cond2: - %val.a.idx = load i32, ptr %a.idx.deref, align 4 - %zero.check = icmp eq i32 %val.a.idx, 0 - br i1 %zero.check, label %deopt.exit2, label %for.cond3 - -for.cond3: - %for.check = icmp ult i64 %idx, %x - br i1 %for.check, label %for.body, label %return - -for.body: - br label %for.tail - -for.tail: - %sum.next = add i32 %sum, %val.a.idx - %idx.next = add nuw nsw i64 %idx, 1 - br label %for.cond1 - -return: - ret i32 %sum - -deopt.exit1: - %deopt.val1 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ] - ret i32 %deopt.val1 -deopt.exit2: - %deopt.val2 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ] - ret i32 %deopt.val2 -} diff --git a/llvm/test/Transforms/LoopRotate/multiple-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-exits.ll deleted file mode 100644 index 748700c..0000000 --- a/llvm/test/Transforms/LoopRotate/multiple-exits.ll +++ /dev/null @@ -1,236 +0,0 @@ -; RUN: opt -S -passes=loop-rotate < %s -verify-loop-info -verify-dom-info -verify-memoryssa | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.8.0" - -; PR7447 -define i32 @test1(ptr nocapture %a) nounwind readonly { -entry: - br label %for.cond - -for.cond: ; preds = %for.cond1, %entry - %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ] - %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ] - br i1 %i.0, label %for.cond1, label %return - -for.cond1: ; preds = %for.cond, %land.rhs - %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ] - %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ] - %cmp2 = icmp ult i32 %i.1, 100 - br i1 %cmp2, label %land.rhs, label %for.cond - -land.rhs: ; preds = %for.cond1 - %conv = zext i32 %i.1 to i64 - %arrayidx = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 %conv - %0 = load i32, ptr %arrayidx, align 4 - %add = add i32 %0, %sum.1 - %cmp4 = icmp ugt i32 %add, 1000 - %inc = add i32 %i.1, 1 - br i1 %cmp4, label %return, label %for.cond1 - -return: ; preds = %for.cond, %land.rhs - %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ] - ret i32 %retval.0 - -; CHECK-LABEL: @test1( -; CHECK: for.cond1.preheader: -; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ] -; CHECK: br label %for.cond1 - -; CHECK: for.cond1: -; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ] -; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ] -; CHECK: %cmp2 = icmp ult i32 %i.1, 100 -; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit -} - -define void @test2(i32 %x) nounwind { -entry: - br label %for.cond - -for.cond: ; preds = %if.end, %entry - %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] - %cmp = icmp eq i32 %i.0, %x - br i1 %cmp, label %return.loopexit, label %for.body - -for.body: ; preds = %for.cond - %call = tail call i32 @foo(i32 %i.0) nounwind - %tobool = icmp eq i32 %call, 0 - br i1 %tobool, label %if.end, label %a - -if.end: ; preds = %for.body - %call1 = tail call i32 @foo(i32 42) nounwind - %inc = add i32 %i.0, 1 - br label %for.cond - -a: ; preds = %for.body - %call2 = tail call i32 @bar(i32 1) nounwind - br label %return - -return.loopexit: ; preds = %for.cond - br label %return - -return: ; preds = %return.loopexit, %a - ret void - -; CHECK-LABEL: @test2( -; CHECK: if.end: -; CHECK: %inc = add i32 %i.02, 1 -; CHECK: %cmp = icmp eq i32 %inc, %x -; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body -} - -declare i32 @foo(i32) - -declare i32 @bar(i32) - -@_ZTIi = external constant ptr - -; Verify dominators. -define void @test3(i32 %x) personality ptr @__gxx_personality_v0 { -entry: - %cmp2 = icmp eq i32 0, %x - br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph - -for.body.lr.ph: ; preds = %entry - br label %for.body - -for.body: ; preds = %for.body.lr.ph, %for.inc - %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] - invoke void @_Z3fooi(i32 %i.03) - to label %for.inc unwind label %lpad - -for.inc: ; preds = %for.body - %inc = add i32 %i.03, 1 - %cmp = icmp eq i32 %inc, %x - br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body - -lpad: ; preds = %for.body - %0 = landingpad { ptr, i32 } - catch ptr @_ZTIi - %1 = extractvalue { ptr, i32 } %0, 0 - %2 = extractvalue { ptr, i32 } %0, 1 - %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi) nounwind - %matches = icmp eq i32 %2, %3 - br i1 %matches, label %catch, label %eh.resume - -catch: ; preds = %lpad - %4 = tail call ptr @__cxa_begin_catch(ptr %1) nounwind - br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph - -for.body.i.lr.ph: ; preds = %catch - br label %for.body.i - -for.body.i: ; preds = %for.body.i.lr.ph, %for.inc.i - %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ] - invoke void @_Z3fooi(i32 %i.0.i1) - to label %for.inc.i unwind label %lpad.i - -for.inc.i: ; preds = %for.body.i - %inc.i = add i32 %i.0.i1, 1 - %cmp.i = icmp eq i32 %inc.i, 0 - br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i - -lpad.i: ; preds = %for.body.i - %5 = landingpad { ptr, i32 } - catch ptr @_ZTIi - %6 = extractvalue { ptr, i32 } %5, 0 - %7 = extractvalue { ptr, i32 } %5, 1 - %matches.i = icmp eq i32 %7, %3 - br i1 %matches.i, label %catch.i, label %lpad1.body - -catch.i: ; preds = %lpad.i - %8 = tail call ptr @__cxa_begin_catch(ptr %6) nounwind - invoke void @test3(i32 0) - to label %invoke.cont2.i unwind label %lpad1.i - -invoke.cont2.i: ; preds = %catch.i - tail call void @__cxa_end_catch() nounwind - br label %invoke.cont2 - -lpad1.i: ; preds = %catch.i - %9 = landingpad { ptr, i32 } - cleanup - %10 = extractvalue { ptr, i32 } %9, 0 - %11 = extractvalue { ptr, i32 } %9, 1 - tail call void @__cxa_end_catch() nounwind - br label %lpad1.body - -for.cond.i.invoke.cont2.loopexit_crit_edge: ; preds = %for.inc.i - br label %invoke.cont2.loopexit - -invoke.cont2.loopexit: ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch - br label %invoke.cont2 - -invoke.cont2: ; preds = %invoke.cont2.loopexit, %invoke.cont2.i - tail call void @__cxa_end_catch() nounwind - br label %try.cont - -for.cond.try.cont.loopexit_crit_edge: ; preds = %for.inc - br label %try.cont.loopexit - -try.cont.loopexit: ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry - br label %try.cont - -try.cont: ; preds = %try.cont.loopexit, %invoke.cont2 - ret void - -lpad1.body: ; preds = %lpad1.i, %lpad.i - %exn.slot.0.i = phi ptr [ %10, %lpad1.i ], [ %6, %lpad.i ] - %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ] - tail call void @__cxa_end_catch() nounwind - br label %eh.resume - -eh.resume: ; preds = %lpad1.body, %lpad - %exn.slot.0 = phi ptr [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ] - %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ] - %lpad.val = insertvalue { ptr, i32 } undef, ptr %exn.slot.0, 0 - %lpad.val5 = insertvalue { ptr, i32 } %lpad.val, i32 %ehselector.slot.0, 1 - resume { ptr, i32 } %lpad.val5 -} - -declare void @_Z3fooi(i32) - -declare i32 @__gxx_personality_v0(...) - -declare i32 @llvm.eh.typeid.for(ptr) nounwind readnone - -declare ptr @__cxa_begin_catch(ptr) - -declare void @__cxa_end_catch() - -define void @test4(i1 %arg) nounwind uwtable { -entry: - br label %"7" - -"3": ; preds = %"7" - br i1 %arg, label %"31", label %"4" - -"4": ; preds = %"3" - %. = select i1 undef, float 0x3F50624DE0000000, float undef - %0 = add i32 %1, 1 - br label %"7" - -"7": ; preds = %"4", %entry - %1 = phi i32 [ %0, %"4" ], [ 0, %entry ] - %2 = icmp slt i32 %1, 100 - br i1 %2, label %"3", label %"8" - -"8": ; preds = %"7" - br i1 %arg, label %"9", label %"31" - -"9": ; preds = %"8" - br label %"33" - -"27": ; preds = %"31" - unreachable - -"31": ; preds = %"8", %"3" - br i1 %arg, label %"27", label %"32" - -"32": ; preds = %"31" - br label %"33" - -"33": ; preds = %"32", %"9" - ret void -} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index 649e34e..7548bf6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -45,9 +45,6 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 8) ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4) ; CHECK-UF4-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 4, i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY2:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 8, i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 12, i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY1:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY1]] ] @@ -67,17 +64,11 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK5]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP19]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK6]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 -; CHECK-UF4-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 4 -; CHECK-UF4-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 8 -; CHECK-UF4-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 12 ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP12]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 12) ; CHECK-UF4-NEXT: [[TMP11]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 8) ; CHECK-UF4-NEXT: [[TMP10]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4) ; CHECK-UF4-NEXT: [[TMP9]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT7:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP13]], i64 [[TMP6]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT8:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP14]], i64 [[TMP6]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT9:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP15]], i64 [[TMP6]]) ; CHECK-UF4-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 ; CHECK-UF4-NEXT: [[TMP20:%.*]] = xor i1 [[TMP21]], true ; CHECK-UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll index 5ee4e9e..75acbea9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-wide-lane-mask.ll @@ -46,23 +46,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]] ; CHECK-UF4-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP3]] ; CHECK-UF4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 -; CHECK-UF4-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]] -; CHECK-UF4-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]] -; CHECK-UF4-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP15]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 0, i64 [[N]]) ; CHECK-UF4-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 48) ; CHECK-UF4-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 32) ; CHECK-UF4-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 16) ; CHECK-UF4-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -103,23 +91,11 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src, ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr [[TMP42]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP28]], ptr [[TMP45]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62]] -; CHECK-UF4-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4 -; CHECK-UF4-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]] -; CHECK-UF4-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP50:%.*]] = shl nuw i64 [[TMP49]], 5 -; CHECK-UF4-NEXT: [[TMP51:%.*]] = add i64 [[INDEX]], [[TMP50]] -; CHECK-UF4-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 48 -; CHECK-UF4-NEXT: [[TMP54:%.*]] = add i64 [[INDEX]], [[TMP53]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 [[INDEX]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP58]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 48) ; CHECK-UF4-NEXT: [[TMP57]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 32) ; CHECK-UF4-NEXT: [[TMP56]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 16) ; CHECK-UF4-NEXT: [[TMP55]] = call <vscale x 16 x i1> @llvm.vector.extract.nxv16i1.nxv64i1(<vscale x 64 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP48]], i64 [[TMP9]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP51]], i64 [[TMP9]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP54]], i64 [[TMP9]]) ; CHECK-UF4-NEXT: [[TMP59:%.*]] = extractelement <vscale x 16 x i1> [[TMP55]], i32 0 ; CHECK-UF4-NEXT: [[TMP60:%.*]] = xor i1 [[TMP59]], true ; CHECK-UF4-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -191,23 +167,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]] ; CHECK-UF4-NEXT: [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP26]] ; CHECK-UF4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = select i1 [[TMP56]], i64 [[TMP31]], i64 0 -; CHECK-UF4-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]] -; CHECK-UF4-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]] -; CHECK-UF4-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 6 -; CHECK-UF4-NEXT: [[INDEX_PART_NEXT2:%.*]] = add i64 0, [[TMP10]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[N]]) ; CHECK-UF4-NEXT: [[TMP14:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 6) ; CHECK-UF4-NEXT: [[TMP13:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 4) ; CHECK-UF4-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 2) ; CHECK-UF4-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_ENTRY]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY3:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY4:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT1]], i64 [[N]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_ENTRY5:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_PART_NEXT2]], i64 [[N]]) ; CHECK-UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-UF4: vector.body: ; CHECK-UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -248,23 +212,11 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr [[TMP37]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP19]], ptr [[TMP40]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]]) ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] -; CHECK-UF4-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1 -; CHECK-UF4-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]] -; CHECK-UF4-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP45:%.*]] = shl nuw i64 [[TMP44]], 2 -; CHECK-UF4-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]] -; CHECK-UF4-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-UF4-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 6 -; CHECK-UF4-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP48]] ; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP53]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 6) ; CHECK-UF4-NEXT: [[TMP52]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 4) ; CHECK-UF4-NEXT: [[TMP51]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 2) ; CHECK-UF4-NEXT: [[TMP50]] = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv8i1(<vscale x 8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT12:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP43]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT13:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP46]], i64 [[WIDE_TRIP_COUNT]]) -; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK_NEXT14:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP49]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-UF4-NEXT: [[TMP54:%.*]] = extractelement <vscale x 2 x i1> [[TMP50]], i32 0 ; CHECK-UF4-NEXT: [[TMP55:%.*]] = xor i1 [[TMP54]], true ; CHECK-UF4-NEXT: br i1 [[TMP55]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll index d73900d..83b494a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/veclib-function-calls.ll @@ -2288,7 +2288,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { } ;. ; CHECK: attributes #[[ATTR0]] = { "target-features"="+v" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR2]] = { "vector-function-abi-variant"="_ZGVrNxv_acos(Sleef_acosdx_u10rvvm2)" } ; CHECK: attributes #[[ATTR3]] = { "vector-function-abi-variant"="_ZGVrNxv_acosf(Sleef_acosfx_u10rvvm2)" } ; CHECK: attributes #[[ATTR4]] = { "vector-function-abi-variant"="_ZGVrNxv_acosh(Sleef_acoshdx_u10rvvm2)" } diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index f5329cf..c225ede5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -580,6 +580,201 @@ exit: ret double %accum } +define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %src, ptr noalias %src.2, ptr noalias %dst) #0 { +; I64-LABEL: define void @loaded_address_used_by_load_through_blend( +; I64-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; I64-NEXT: [[ENTRY:.*]]: +; I64-NEXT: br label %[[LOOP_HEADER:.*]] +; I64: [[LOOP_HEADER]]: +; I64-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; I64-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_LATCH]] ] +; I64-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1 +; I64-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]] +; I64-NEXT: [[L_SRC:%.*]] = load float, ptr [[GEP_SRC]], align 4 +; I64-NEXT: [[C:%.*]] = fcmp oeq float [[L_SRC]], 0.000000e+00 +; I64-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; I64: [[THEN]]: +; I64-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV_1]], [[START]] +; I64-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[IV_MUL]] +; I64-NEXT: br label %[[LOOP_LATCH]] +; I64: [[LOOP_LATCH]]: +; I64-NEXT: [[MERGE_GEP:%.*]] = phi ptr [ [[GEP_SRC_2]], %[[THEN]] ], [ [[SRC_2]], %[[LOOP_HEADER]] ] +; I64-NEXT: [[L_2:%.*]] = load float, ptr [[MERGE_GEP]], align 4 +; I64-NEXT: [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; I64-NEXT: store float [[L_2]], ptr [[GEP_DST]], align 4 +; I64-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; I64-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1 +; I64-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 100 +; I64-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]] +; I64: [[EXIT]]: +; I64-NEXT: ret void +; +; I32-LABEL: define void @loaded_address_used_by_load_through_blend( +; I32-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] { +; I32-NEXT: [[ENTRY:.*:]] +; I32-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1 +; I32-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 100) +; I32-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]] +; I32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8 +; I32-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; I32: [[VECTOR_PH]]: +; I32-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8 +; I32-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; I32-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]] +; I32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0 +; I32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +; I32-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0 +; I32-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer +; I32-NEXT: br label %[[VECTOR_BODY:.*]] +; I32: [[VECTOR_BODY]]: +; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 +; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 +; I32-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 4 +; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5 +; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6 +; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7 +; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1 +; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1 +; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1 +; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1 +; I32-NEXT: [[TMP15:%.*]] = add i64 [[TMP7]], 1 +; I32-NEXT: [[TMP16:%.*]] = add i64 [[TMP8]], 1 +; I32-NEXT: [[TMP17:%.*]] = add i64 [[TMP9]], 1 +; I32-NEXT: [[TMP18:%.*]] = add i64 [[TMP10]], 1 +; I32-NEXT: [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0 +; I32-NEXT: [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1 +; I32-NEXT: [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2 +; I32-NEXT: [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3 +; I32-NEXT: [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4 +; I32-NEXT: [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5 +; I32-NEXT: [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6 +; I32-NEXT: [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7 +; I32-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]] +; I32-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]] +; I32-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] +; I32-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]] +; I32-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP15]] +; I32-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP16]] +; I32-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP17]] +; I32-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] +; I32-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP27]], align 4 +; I32-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4 +; I32-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4 +; I32-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4 +; I32-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4 +; I32-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4 +; I32-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4 +; I32-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4 +; I32-NEXT: [[TMP43:%.*]] = insertelement <8 x float> poison, float [[TMP35]], i32 0 +; I32-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP36]], i32 1 +; I32-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP37]], i32 2 +; I32-NEXT: [[TMP46:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP38]], i32 3 +; I32-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP39]], i32 4 +; I32-NEXT: [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP40]], i32 5 +; I32-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6 +; I32-NEXT: [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7 +; I32-NEXT: [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer +; I32-NEXT: [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]] +; I32-NEXT: [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0 +; I32-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]] +; I32-NEXT: [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1 +; I32-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]] +; I32-NEXT: [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2 +; I32-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]] +; I32-NEXT: [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3 +; I32-NEXT: [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]] +; I32-NEXT: [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4 +; I32-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]] +; I32-NEXT: [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5 +; I32-NEXT: [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]] +; I32-NEXT: [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6 +; I32-NEXT: [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]] +; I32-NEXT: [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7 +; I32-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]] +; I32-NEXT: [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0 +; I32-NEXT: [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1 +; I32-NEXT: [[TMP71:%.*]] = insertelement <8 x ptr> [[TMP70]], ptr [[TMP58]], i32 2 +; I32-NEXT: [[TMP72:%.*]] = insertelement <8 x ptr> [[TMP71]], ptr [[TMP60]], i32 3 +; I32-NEXT: [[TMP73:%.*]] = insertelement <8 x ptr> [[TMP72]], ptr [[TMP62]], i32 4 +; I32-NEXT: [[TMP74:%.*]] = insertelement <8 x ptr> [[TMP73]], ptr [[TMP64]], i32 5 +; I32-NEXT: [[TMP75:%.*]] = insertelement <8 x ptr> [[TMP74]], ptr [[TMP66]], i32 6 +; I32-NEXT: [[TMP76:%.*]] = insertelement <8 x ptr> [[TMP75]], ptr [[TMP68]], i32 7 +; I32-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[TMP51]], <8 x ptr> [[TMP76]], <8 x ptr> [[BROADCAST_SPLAT2]] +; I32-NEXT: [[TMP77:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 0 +; I32-NEXT: [[TMP78:%.*]] = load float, ptr [[TMP77]], align 4 +; I32-NEXT: [[TMP79:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 1 +; I32-NEXT: [[TMP80:%.*]] = load float, ptr [[TMP79]], align 4 +; I32-NEXT: [[TMP81:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 2 +; I32-NEXT: [[TMP82:%.*]] = load float, ptr [[TMP81]], align 4 +; I32-NEXT: [[TMP83:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 3 +; I32-NEXT: [[TMP84:%.*]] = load float, ptr [[TMP83]], align 4 +; I32-NEXT: [[TMP85:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 4 +; I32-NEXT: [[TMP86:%.*]] = load float, ptr [[TMP85]], align 4 +; I32-NEXT: [[TMP87:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 5 +; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4 +; I32-NEXT: [[TMP89:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 6 +; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4 +; I32-NEXT: [[TMP91:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 7 +; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4 +; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] +; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] +; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] +; I32-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] +; I32-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] +; I32-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]] +; I32-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] +; I32-NEXT: store float [[TMP78]], ptr [[TMP93]], align 4 +; I32-NEXT: store float [[TMP80]], ptr [[TMP94]], align 4 +; I32-NEXT: store float [[TMP82]], ptr [[TMP95]], align 4 +; I32-NEXT: store float [[TMP84]], ptr [[TMP96]], align 4 +; I32-NEXT: store float [[TMP86]], ptr [[TMP97]], align 4 +; I32-NEXT: store float [[TMP88]], ptr [[TMP98]], align 4 +; I32-NEXT: store float [[TMP90]], ptr [[TMP99]], align 4 +; I32-NEXT: store float [[TMP92]], ptr [[TMP100]], align 4 +; I32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; I32-NEXT: [[TMP101:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; I32-NEXT: br i1 [[TMP101]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; I32: [[MIDDLE_BLOCK]]: +; I32-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; I32-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; I32: [[SCALAR_PH]]: +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %iv.2 = phi i64 [ %start, %entry ], [ %iv.2.next, %loop.latch ] + %iv.1 = add i64 %iv, 1 + %gep.src = getelementptr i8, ptr %src, i64 %iv.1 + %l.src = load float, ptr %gep.src, align 4 + %c = fcmp oeq float %l.src, 0.000000e+00 + br i1 %c, label %then, label %loop.latch + +then: + %iv.mul = mul i64 %iv.1, %start + %gep.src.2 = getelementptr i8, ptr %src.2, i64 %iv.mul + br label %loop.latch + +loop.latch: + %merge.gep = phi ptr [ %gep.src.2, %then ], [ %src.2, %loop.header ] + %l.2 = load float, ptr %merge.gep, align 4 + %gep.dst = getelementptr i8, ptr %dst, i64 %iv + store float %l.2, ptr %gep.dst, align 4 + %iv.next = add i64 %iv, 1 + %iv.2.next = add i64 %iv.2, -1 + %ec = icmp sgt i64 %iv.2, 100 + br i1 %ec, label %loop.header, label %exit + +exit: + ret void +} + +attributes #0 = { "target-cpu"="znver3" } attributes #0 = { "target-cpu"="znver2" } !0 = distinct !{!0, !1} diff --git a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll index af57967..b63ab8f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-predication.ll @@ -22,7 +22,6 @@ define void @foo(i32 %val, ptr dereferenceable(1024) %ptr) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 256) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP1]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll index 3500c5c..4fd8d17 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -546,19 +546,50 @@ define i64 @loop_guards_needed_to_prove_deref_multiple(i32 %x, i1 %c, ptr derefe ; CHECK-NEXT: call void @llvm.assume(i1 [[PRE_2]]) ; CHECK-NEXT: [[N:%.*]] = add i32 [[SEL]], -1 ; CHECK-NEXT: [[N_EXT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SEL]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IV_NEXT]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT]], [[LOOP_LATCH]] ], [ 0, [[PH]] ] +; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[PH]] ] -; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], [[LOOP_LATCH1:%.*]] ], [ [[IV]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC_I:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I]], align 1 ; CHECK-NEXT: [[C_1:%.*]] = icmp eq i8 [[L]], 0 -; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_LOOPEXIT]], label [[LOOP_LATCH1]] ; CHECK: loop.latch: -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N_EXT]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[IV_NEXT1]] = add i64 [[IV1]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], [[N_EXT]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: exit.loopexit: -; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV]], [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ] +; CHECK-NEXT: [[RES_PH:%.*]] = phi i64 [ [[IV1]], [[LOOP_HEADER1]] ], [ 0, [[LOOP_LATCH1]] ], [ 0, [[LOOP_LATCH]] ], [ [[TMP10]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ -2, [[THEN]] ], [ [[RES_PH]], [[EXIT_LOOPEXIT]] ] @@ -609,4 +640,6 @@ exit: ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/NewGVN/ptrtoaddr.ll b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll new file mode 100644 index 0000000..e51b42a --- /dev/null +++ b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=newgvn < %s | FileCheck %s + +define i64 @ptrtoaddr_same(ptr %p) { +; CHECK-LABEL: define i64 @ptrtoaddr_same( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: ret i64 0 +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} + +; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ +; in result type for the same input. +define i64 @ptrtoaddr_different(ptr %p, ptr %p2) { +; CHECK-LABEL: define i64 @ptrtoaddr_different( +; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[I:%.*]] = ptrtoaddr ptr [[P]] to i64 +; CHECK-NEXT: [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64 +; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[I]], [[J]] +; CHECK-NEXT: ret i64 [[SUB]] +; + %i = ptrtoaddr ptr %p to i64 + %j = ptrtoaddr ptr %p2 to i64 + %sub = sub i64 %i, %j + ret i64 %sub +} diff --git a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll new file mode 100644 index 0000000..7c9888f --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll @@ -0,0 +1,448 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -O3 < %s | FileCheck %s + +target datalayout = "n64:32:16:8" + +define i8 @test(i8 %x) { +; CHECK-LABEL: define range(i8 0, 53) i8 @test( +; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[X_:%.*]] = tail call i8 @llvm.umin.i8(i8 [[X]], i8 52) +; CHECK-NEXT: ret i8 [[X_]] +; +start: + %_0 = alloca [1 x i8], align 1 + %0 = icmp eq i8 %x, 0 + br i1 %0, label %bb1, label %bb2 + +bb1: ; preds = %start + store i8 0, ptr %_0, align 1 + br label %bb105 + +bb2: ; preds = %start + %1 = icmp eq i8 %x, 1 + br i1 %1, label %bb3, label %bb4 + +bb105: ; preds = %bb104, %bb103, %bb101, %bb99, %bb97, %bb95, %bb93, %bb91, %bb89, %bb87, %bb85, %bb83, %bb81, %bb79, %bb77, %bb75, %bb73, %bb71, %bb69, %bb67, %bb65, %bb63, %bb61, %bb59, %bb57, %bb55, %bb53, %bb51, %bb49, %bb47, %bb45, %bb43, %bb41, %bb39, %bb37, %bb35, %bb33, %bb31, %bb29, %bb27, %bb25, %bb23, %bb21, %bb19, %bb17, %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1 + %2 = load i8, ptr %_0, align 1 + ret i8 %2 + +bb3: ; preds = %bb2 + store i8 1, ptr %_0, align 1 + br label %bb105 + +bb4: ; preds = %bb2 + %3 = icmp eq i8 %x, 2 + br i1 %3, label %bb5, label %bb6 + +bb5: ; preds = %bb4 + store i8 2, ptr %_0, align 1 + br label %bb105 + +bb6: ; preds = %bb4 + %4 = icmp eq i8 %x, 3 + br i1 %4, label %bb7, label %bb8 + +bb7: ; preds = %bb6 + store i8 3, ptr %_0, align 1 + br label %bb105 + +bb8: ; preds = %bb6 + %5 = icmp eq i8 %x, 4 + br i1 %5, label %bb9, label %bb10 + +bb9: ; preds = %bb8 + store i8 4, ptr %_0, align 1 + br label %bb105 + +bb10: ; preds = %bb8 + %6 = icmp eq i8 %x, 5 + br i1 %6, label %bb11, label %bb12 + +bb11: ; preds = %bb10 + store i8 5, ptr %_0, align 1 + br label %bb105 + +bb12: ; preds = %bb10 + %7 = icmp eq i8 %x, 6 + br i1 %7, label %bb13, label %bb14 + +bb13: ; preds = %bb12 + store i8 6, ptr %_0, align 1 + br label %bb105 + +bb14: ; preds = %bb12 + %8 = icmp eq i8 %x, 7 + br i1 %8, label %bb15, label %bb16 + +bb15: ; preds = %bb14 + store i8 7, ptr %_0, align 1 + br label %bb105 + +bb16: ; preds = %bb14 + %9 = icmp eq i8 %x, 8 + br i1 %9, label %bb17, label %bb18 + +bb17: ; preds = %bb16 + store i8 8, ptr %_0, align 1 + br label %bb105 + +bb18: ; preds = %bb16 + %10 = icmp eq i8 %x, 9 + br i1 %10, label %bb19, label %bb20 + +bb19: ; preds = %bb18 + store i8 9, ptr %_0, align 1 + br label %bb105 + +bb20: ; preds = %bb18 + %11 = icmp eq i8 %x, 10 + br i1 %11, label %bb21, label %bb22 + +bb21: ; preds = %bb20 + store i8 10, ptr %_0, align 1 + br label %bb105 + +bb22: ; preds = %bb20 + %12 = icmp eq i8 %x, 11 + br i1 %12, label %bb23, label %bb24 + +bb23: ; preds = %bb22 + store i8 11, ptr %_0, align 1 + br label %bb105 + +bb24: ; preds = %bb22 + %13 = icmp eq i8 %x, 12 + br i1 %13, label %bb25, label %bb26 + +bb25: ; preds = %bb24 + store i8 12, ptr %_0, align 1 + br label %bb105 + +bb26: ; preds = %bb24 + %14 = icmp eq i8 %x, 13 + br i1 %14, label %bb27, label %bb28 + +bb27: ; preds = %bb26 + store i8 13, ptr %_0, align 1 + br label %bb105 + +bb28: ; preds = %bb26 + %15 = icmp eq i8 %x, 14 + br i1 %15, label %bb29, label %bb30 + +bb29: ; preds = %bb28 + store i8 14, ptr %_0, align 1 + br label %bb105 + +bb30: ; preds = %bb28 + %16 = icmp eq i8 %x, 15 + br i1 %16, label %bb31, label %bb32 + +bb31: ; preds = %bb30 + store i8 15, ptr %_0, align 1 + br label %bb105 + +bb32: ; preds = %bb30 + %17 = icmp eq i8 %x, 16 + br i1 %17, label %bb33, label %bb34 + +bb33: ; preds = %bb32 + store i8 16, ptr %_0, align 1 + br label %bb105 + +bb34: ; preds = %bb32 + %18 = icmp eq i8 %x, 17 + br i1 %18, label %bb35, label %bb36 + +bb35: ; preds = %bb34 + store i8 17, ptr %_0, align 1 + br label %bb105 + +bb36: ; preds = %bb34 + %19 = icmp eq i8 %x, 18 + br i1 %19, label %bb37, label %bb38 + +bb37: ; preds = %bb36 + store i8 18, ptr %_0, align 1 + br label %bb105 + +bb38: ; preds = %bb36 + %20 = icmp eq i8 %x, 19 + br i1 %20, label %bb39, label %bb40 + +bb39: ; preds = %bb38 + store i8 19, ptr %_0, align 1 + br label %bb105 + +bb40: ; preds = %bb38 + %21 = icmp eq i8 %x, 20 + br i1 %21, label %bb41, label %bb42 + +bb41: ; preds = %bb40 + store i8 20, ptr %_0, align 1 + br label %bb105 + +bb42: ; preds = %bb40 + %22 = icmp eq i8 %x, 21 + br i1 %22, label %bb43, label %bb44 + +bb43: ; preds = %bb42 + store i8 21, ptr %_0, align 1 + br label %bb105 + +bb44: ; preds = %bb42 + %23 = icmp eq i8 %x, 22 + br i1 %23, label %bb45, label %bb46 + +bb45: ; preds = %bb44 + store i8 22, ptr %_0, align 1 + br label %bb105 + +bb46: ; preds = %bb44 + %24 = icmp eq i8 %x, 23 + br i1 %24, label %bb47, label %bb48 + +bb47: ; preds = %bb46 + store i8 23, ptr %_0, align 1 + br label %bb105 + +bb48: ; preds = %bb46 + %25 = icmp eq i8 %x, 24 + br i1 %25, label %bb49, label %bb50 + +bb49: ; preds = %bb48 + store i8 24, ptr %_0, align 1 + br label %bb105 + +bb50: ; preds = %bb48 + %26 = icmp eq i8 %x, 25 + br i1 %26, label %bb51, label %bb52 + +bb51: ; preds = %bb50 + store i8 25, ptr %_0, align 1 + br label %bb105 + +bb52: ; preds = %bb50 + %27 = icmp eq i8 %x, 26 + br i1 %27, label %bb53, label %bb54 + +bb53: ; preds = %bb52 + store i8 26, ptr %_0, align 1 + br label %bb105 + +bb54: ; preds = %bb52 + %28 = icmp eq i8 %x, 27 + br i1 %28, label %bb55, label %bb56 + +bb55: ; preds = %bb54 + store i8 27, ptr %_0, align 1 + br label %bb105 + +bb56: ; preds = %bb54 + %29 = icmp eq i8 %x, 28 + br i1 %29, label %bb57, label %bb58 + +bb57: ; preds = %bb56 + store i8 28, ptr %_0, align 1 + br label %bb105 + +bb58: ; preds = %bb56 + %30 = icmp eq i8 %x, 29 + br i1 %30, label %bb59, label %bb60 + +bb59: ; preds = %bb58 + store i8 29, ptr %_0, align 1 + br label %bb105 + +bb60: ; preds = %bb58 + %31 = icmp eq i8 %x, 30 + br i1 %31, label %bb61, label %bb62 + +bb61: ; preds = %bb60 + store i8 30, ptr %_0, align 1 + br label %bb105 + +bb62: ; preds = %bb60 + %32 = icmp eq i8 %x, 31 + br i1 %32, label %bb63, label %bb64 + +bb63: ; preds = %bb62 + store i8 31, ptr %_0, align 1 + br label %bb105 + +bb64: ; preds = %bb62 + %33 = icmp eq i8 %x, 32 + br i1 %33, label %bb65, label %bb66 + +bb65: ; preds = %bb64 + store i8 32, ptr %_0, align 1 + br label %bb105 + +bb66: ; preds = %bb64 + %34 = icmp eq i8 %x, 33 + br i1 %34, label %bb67, label %bb68 + +bb67: ; preds = %bb66 + store i8 33, ptr %_0, align 1 + br label %bb105 + +bb68: ; preds = %bb66 + %35 = icmp eq i8 %x, 34 + br i1 %35, label %bb69, label %bb70 + +bb69: ; preds = %bb68 + store i8 34, ptr %_0, align 1 + br label %bb105 + +bb70: ; preds = %bb68 + %36 = icmp eq i8 %x, 35 + br i1 %36, label %bb71, label %bb72 + +bb71: ; preds = %bb70 + store i8 35, ptr %_0, align 1 + br label %bb105 + +bb72: ; preds = %bb70 + %37 = icmp eq i8 %x, 36 + br i1 %37, label %bb73, label %bb74 + +bb73: ; preds = %bb72 + store i8 36, ptr %_0, align 1 + br label %bb105 + +bb74: ; preds = %bb72 + %38 = icmp eq i8 %x, 37 + br i1 %38, label %bb75, label %bb76 + +bb75: ; preds = %bb74 + store i8 37, ptr %_0, align 1 + br label %bb105 + +bb76: ; preds = %bb74 + %39 = icmp eq i8 %x, 38 + br i1 %39, label %bb77, label %bb78 + +bb77: ; preds = %bb76 + store i8 38, ptr %_0, align 1 + br label %bb105 + +bb78: ; preds = %bb76 + %40 = icmp eq i8 %x, 39 + br i1 %40, label %bb79, label %bb80 + +bb79: ; preds = %bb78 + store i8 39, ptr %_0, align 1 + br label %bb105 + +bb80: ; preds = %bb78 + %41 = icmp eq i8 %x, 40 + br i1 %41, label %bb81, label %bb82 + +bb81: ; preds = %bb80 + store i8 40, ptr %_0, align 1 + br label %bb105 + +bb82: ; preds = %bb80 + %42 = icmp eq i8 %x, 41 + br i1 %42, label %bb83, label %bb84 + +bb83: ; preds = %bb82 + store i8 41, ptr %_0, align 1 + br label %bb105 + +bb84: ; preds = %bb82 + %43 = icmp eq i8 %x, 42 + br i1 %43, label %bb85, label %bb86 + +bb85: ; preds = %bb84 + store i8 42, ptr %_0, align 1 + br label %bb105 + +bb86: ; preds = %bb84 + %44 = icmp eq i8 %x, 43 + br i1 %44, label %bb87, label %bb88 + +bb87: ; preds = %bb86 + store i8 43, ptr %_0, align 1 + br label %bb105 + +bb88: ; preds = %bb86 + %45 = icmp eq i8 %x, 44 + br i1 %45, label %bb89, label %bb90 + +bb89: ; preds = %bb88 + store i8 44, ptr %_0, align 1 + br label %bb105 + +bb90: ; preds = %bb88 + %46 = icmp eq i8 %x, 45 + br i1 %46, label %bb91, label %bb92 + +bb91: ; preds = %bb90 + store i8 45, ptr %_0, align 1 + br label %bb105 + +bb92: ; preds = %bb90 + %47 = icmp eq i8 %x, 46 + br i1 %47, label %bb93, label %bb94 + +bb93: ; preds = %bb92 + store i8 46, ptr %_0, align 1 + br label %bb105 + +bb94: ; preds = %bb92 + %48 = icmp eq i8 %x, 47 + br i1 %48, label %bb95, label %bb96 + +bb95: ; preds = %bb94 + store i8 47, ptr %_0, align 1 + br label %bb105 + +bb96: ; preds = %bb94 + %49 = icmp eq i8 %x, 48 + br i1 %49, label %bb97, label %bb98 + +bb97: ; preds = %bb96 + store i8 48, ptr %_0, align 1 + br label %bb105 + +bb98: ; preds = %bb96 + %50 = icmp eq i8 %x, 49 + br i1 %50, label %bb99, label %bb100 + +bb99: ; preds = %bb98 + store i8 49, ptr %_0, align 1 + br label %bb105 + +bb100: ; preds = %bb98 + %51 = icmp eq i8 %x, 50 + br i1 %51, label %bb101, label %bb102 + +bb101: ; preds = %bb100 + store i8 50, ptr %_0, align 1 + br label %bb105 + +bb102: ; preds = %bb100 + %52 = icmp eq i8 %x, 51 + br i1 %52, label %bb103, label %bb104 + +bb103: ; preds = %bb102 + store i8 51, ptr %_0, align 1 + br label %bb105 + +bb104: ; preds = %bb102 + store i8 52, ptr %_0, align 1 + br label %bb105 +} + +; Make sure the call is inlined. +define i8 @test2(i8 %x) { +; CHECK-LABEL: define range(i8 0, 53) i8 @test2( +; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: [[CALL:%.*]] = tail call range(i8 0, 53) i8 @llvm.umin.i8(i8 [[X]], i8 52) +; CHECK-NEXT: ret i8 [[CALL]] +; + %call = call i8 @test(i8 %x) + ret i8 %call +} diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll index 9acc6d6..09f583f 100644 --- a/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll +++ b/llvm/test/Transforms/PreISelIntrinsicLowering/AArch64/expand-exp.ll @@ -39,5 +39,4 @@ declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0 ; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK-NEXT: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll index cf62fd5..a8880274 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll @@ -4,21 +4,14 @@ define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_sdiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2 -; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4 ; CHECK-NEXT: ret void @@ -58,21 +51,14 @@ entry: define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: @test_add_udiv( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2 -; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3 -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 -; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 -; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 -; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1> ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]] -; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]] -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]] ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll index 9bbe3eb..42d3dcc 100644 --- a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll +++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll @@ -97,8 +97,8 @@ if.end: ret ptr %x.0 } -!0 = !{!"int"} -!1 = !{!"char[4]"} +!0 = !{!"int", i1 0} +!1 = !{!"char[4]", i1 0} ;. -; CHECK: [[META0]] = !{!"int"} +; CHECK: [[META0]] = !{!"int", i1 false} ;. diff --git a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll index c9063d3..25267dc 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=OPTNOLUT +; RUN: opt -S -passes='simplifycfg<switch-to-arithmetic>' < %s | FileCheck %s --check-prefix=OPTNOLUT ; RUN: %if amdgpu-registered-target %{ opt -mtriple=amdgcn--amdpal -S -passes='simplifycfg<switch-to-lookup>' < %s | FileCheck %s --check-prefix=TTINOLUT %} ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,23 +7,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32 @linear_transform_with_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_with_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[END:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[CASE0:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE0]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 1, %[[CASE0]] ], [ 4, %[[CASE1]] ], [ 7, %[[CASE2]] ], [ 10, %[[CASE3]] ], [ 13, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 4 +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 +; OPTNOLUT-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 13 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_with_default( @@ -138,26 +126,8 @@ end: define i32 @linear_transform_no_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_no_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: i32 4, label %[[CASE4:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE4]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: unreachable -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = phi i32 [ 3, %[[CASE1]] ], [ 6, %[[CASE2]] ], [ 9, %[[CASE3]] ], [ 12, %[[CASE4]] ], [ 0, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 ; OPTNOLUT-NEXT: ret i32 [[SWITCH_IDX_MULT]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_no_default( @@ -350,18 +320,9 @@ end: define i32 @single_value_withdefault(i32 %x) { ; OPTNOLUT-LABEL: define i32 @single_value_withdefault( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[DOT:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[DOT:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[DOT]] ; ; TTINOLUT-LABEL: define i32 @single_value_withdefault( @@ -401,18 +362,9 @@ end: define i32 @single_value_no_jump_tables(i32 %x) "no-jump-tables"="true" { ; OPTNOLUT-LABEL: define i32 @single_value_no_jump_tables( ; OPTNOLUT-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @single_value_no_jump_tables( @@ -449,6 +401,60 @@ end: ret i32 %idx } +define i1 @single_value_with_mask(i32 %x) { +; OPTNOLUT-LABEL: define i1 @single_value_with_mask( +; OPTNOLUT-SAME: i32 [[X:%.*]]) { +; OPTNOLUT-NEXT: [[ENTRY:.*]]: +; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ +; OPTNOLUT-NEXT: i32 18, label %[[END:.*]] +; OPTNOLUT-NEXT: i32 21, label %[[END]] +; OPTNOLUT-NEXT: i32 48, label %[[END]] +; OPTNOLUT-NEXT: i32 16, label %[[END]] +; OPTNOLUT-NEXT: ] +; OPTNOLUT: [[DEFAULT]]: +; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; OPTNOLUT-NEXT: br label %[[END]] +; OPTNOLUT: [[END]]: +; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ] +; OPTNOLUT-NEXT: ret i1 [[RES]] +; +; TTINOLUT-LABEL: define i1 @single_value_with_mask( +; TTINOLUT-SAME: i32 [[X:%.*]]) { +; TTINOLUT-NEXT: [[ENTRY:.*]]: +; TTINOLUT-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X]], 16 +; TTINOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 33 +; TTINOLUT-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i32 [[SWITCH_TABLEIDX]] to i64 +; TTINOLUT-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i64 4294967333, [[SWITCH_MASKINDEX]] +; TTINOLUT-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i64 [[SWITCH_SHIFTED]] to i1 +; TTINOLUT-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false +; TTINOLUT-NEXT: br i1 [[OR_COND]], label %[[END:.*]], label %[[DEFAULT:.*]] +; TTINOLUT: [[DEFAULT]]: +; TTINOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; TTINOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; TTINOLUT-NEXT: br label %[[END]] +; TTINOLUT: [[END]]: +; TTINOLUT-NEXT: [[RES:%.*]] = phi i1 [ [[SEL]], %[[DEFAULT]] ], [ false, %[[ENTRY]] ] +; TTINOLUT-NEXT: ret i1 [[RES]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] + +default: + %cmp = icmp eq i32 %x, 80 + %sel = select i1 %cmp, i1 false, i1 true + br label %end + +end: + %res = phi i1 [ false, %entry ], [ false, %entry ], [ false, %entry ], [ false, %entry ], [ %sel, %default ] + ret i1 %res +} + define i32 @lookup_table(i32 %x) { ; OPTNOLUT-LABEL: define i32 @lookup_table( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { diff --git a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s index da83c54..5325177 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s +++ b/llvm/test/tools/llvm-exegesis/AArch64/no-aliasing-ld-str.s @@ -1,10 +1,10 @@ REQUIRES: aarch64-registered-target -// Flakey on SVE buildbots, disabled pending invesgitation. -UNSUPPORTED: target={{.*}} RUN: llvm-exegesis -mtriple=aarch64 -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%t.obj --opcode-name=FMOVWSr --benchmark-phase=assemble-measured-code 2>&1 RUN: llvm-objdump -d %t.obj > %t.s RUN: FileCheck %s < %t.s +// Start matching after the printed file path, as that may contain something that looks like a mnemonic. +CHECK: Disassembly of section .text: CHECK-NOT: ld{{[1-4]}} CHECK-NOT: st{{[1-4]}} diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s index bdc02d4..a540d7d 100644 --- a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s +++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s @@ -4,7 +4,7 @@ AMOAND_D: --- AMOAND_D-NEXT: mode: latency AMOAND_D-NEXT: key: AMOAND_D-NEXT: instructions: -AMOAND_D-NEXT: - 'AMOAND_D [[RE01:X[0-9]+]] X10 [[RE01:X[0-9]+]]' +AMOAND_D-NEXT: - 'AMOAND_D [[RE01:X[0-9]+]] [[RE01:X[0-9]+]] X10' AMOAND_D-NEXT: config: '' AMOAND_D-NEXT: register_initial_values: AMOAND_D-NEXT: - '[[RE01:X[0-9]+]]=0x0' @@ -16,7 +16,7 @@ AMOADD_W: --- AMOADD_W-NEXT: mode: latency AMOADD_W-NEXT: key: AMOADD_W-NEXT: instructions: -AMOADD_W-NEXT: - 'AMOADD_W [[RE02:X[0-9]+]] X10 [[RE02:X[0-9]+]]' +AMOADD_W-NEXT: - 'AMOADD_W [[RE02:X[0-9]+]] [[RE02:X[0-9]+]] X10' AMOADD_W-NEXT: config: '' AMOADD_W-NEXT: register_initial_values: AMOADD_W-NEXT: - '[[RE02:X[0-9]+]]=0x0' @@ -28,7 +28,7 @@ AMOMAXU_D: --- AMOMAXU_D-NEXT: mode: latency AMOMAXU_D-NEXT: key: AMOMAXU_D-NEXT: instructions: -AMOMAXU_D-NEXT: - 'AMOMAXU_D [[RE03:X[0-9]+]] X10 [[RE03:X[0-9]+]]' +AMOMAXU_D-NEXT: - 'AMOMAXU_D [[RE03:X[0-9]+]] [[RE03:X[0-9]+]] X10' AMOMAXU_D-NEXT: config: '' AMOMAXU_D-NEXT: register_initial_values: AMOMAXU_D-NEXT: - '[[RE03:X[0-9]+]]=0x0' @@ -40,7 +40,7 @@ AMOMIN_W: --- AMOMIN_W-NEXT: mode: latency AMOMIN_W-NEXT: key: AMOMIN_W-NEXT: instructions: -AMOMIN_W-NEXT: - 'AMOMIN_W [[RE04:X[0-9]+]] X10 [[RE04:X[0-9]+]]' +AMOMIN_W-NEXT: - 'AMOMIN_W [[RE04:X[0-9]+]] [[RE04:X[0-9]+]] X10' AMOMIN_W-NEXT: config: '' AMOMIN_W-NEXT: register_initial_values: AMOMIN_W-NEXT: - '[[RE04:X[0-9]+]]=0x0' @@ -52,7 +52,7 @@ AMOXOR_D: --- AMOXOR_D-NEXT: mode: latency AMOXOR_D-NEXT: key: AMOXOR_D-NEXT: instructions: -AMOXOR_D-NEXT: - 'AMOXOR_D [[RE05:X[0-9]+]] X10 [[RE05:X[0-9]+]]' +AMOXOR_D-NEXT: - 'AMOXOR_D [[RE05:X[0-9]+]] [[RE05:X[0-9]+]] X10' AMOXOR_D-NEXT: config: '' AMOXOR_D-NEXT: register_initial_values: AMOXOR_D-NEXT: - '[[RE05:X[0-9]+]]=0x0' diff --git a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe Binary files differindex 309476a..a4c36a3 100644 --- a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe +++ b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe diff --git a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript index ec5c8ff..29a8803 100644 --- a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript +++ b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript @@ -1,13 +1,13 @@ PERF_RECORD_MMAP2 5752/0: [0x7ff70a1b0000(0x640000) @ 0x1000 00:00 0 0]: r-xp c:\Users\haohaiwe\Desktop\coff-profile.exe - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 - 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 + 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 diff --git a/llvm/test/tools/llvm-profgen/coff-profile.test b/llvm/test/tools/llvm-profgen/coff-profile.test index 5578f73..6411642 100644 --- a/llvm/test/tools/llvm-profgen/coff-profile.test +++ b/llvm/test/tools/llvm-profgen/coff-profile.test @@ -1,37 +1,77 @@ +; RUN: llvm-profgen --format=text --use-dwarf-correlation --perfscript=%S/Inputs/coff-profile.perfscript --binary=%S/Inputs/coff-profile.exe --output=%t +; RUN: FileCheck %s --input-file %t --check-prefix=DWARF ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/coff-profile.perfscript --binary=%S/Inputs/coff-profile.exe --output=%t -; RUN: FileCheck %s --input-file %t +; RUN: FileCheck %s --input-file %t --check-prefix=PROBE -CHECK: main:31837:0 -CHECK-NEXT: 0: 0 -CHECK-NEXT: 3.1: 0 -CHECK-NEXT: 3.2: 0 -CHECK-NEXT: 8: 0 -CHECK-NEXT: 65501: 0 -CHECK-NEXT: 1: ??$init@HG@MyNameSpace2@@YAXHPEAG@Z:0 -CHECK-NEXT: 1: 0 -CHECK-NEXT: 1.1: 0 -CHECK-NEXT: 1.2: 0 -CHECK-NEXT: 2: 0 -CHECK-NEXT: 65514: 0 -CHECK-NEXT: 4: ?work1@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:3193 -CHECK-NEXT: 0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:3193 -CHECK-NEXT: 1.1: 31 -CHECK-NEXT: 1.2: 31 -CHECK-NEXT: 2: 31 -CHECK-NEXT: 3: 31 -CHECK-NEXT: 65530: 0 -CHECK-NEXT: 5: ?work2@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:28644 -CHECK-NEXT: 0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:28644 -CHECK-NEXT: 1.1: 341 -CHECK-NEXT: 1.2: 341 -CHECK-NEXT: 2: 341 -CHECK-NEXT: 3: 341 -CHECK-NEXT: 65530: 0 -CHECK-NEXT: 7: ?print@MyNameSpace2@@YAXPEAGH@Z:0 -CHECK-NEXT: 1: 0 +DWARF: main:31341:0 +DWARF-NEXT: 0: 0 +DWARF-NEXT: 3: 0 +DWARF-NEXT: 3.1: 0 +DWARF-NEXT: 3.2: 0 +DWARF-NEXT: 8: 0 +DWARF-NEXT: 65501: 0 +DWARF-NEXT: 1: ??$init@HG@MyNameSpace2@@YAXHPEAG@Z:0 +DWARF-NEXT: 1: 0 +DWARF-NEXT: 1.1: 0 +DWARF-NEXT: 1.2: 0 +DWARF-NEXT: 2: 0 +DWARF-NEXT: 65514: 0 +DWARF-NEXT: 4: ?work1@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:3038 +DWARF-NEXT: 0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:3038 +DWARF-NEXT: 1.1: 31 +DWARF-NEXT: 1.2: 31 +DWARF-NEXT: 2: 31 +DWARF-NEXT: 3: 31 +DWARF-NEXT: 5: ?work2@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:28303 +DWARF-NEXT: 0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:28303 +DWARF-NEXT: 1.1: 341 +DWARF-NEXT: 1.2: 341 +DWARF-NEXT: 2: 341 +DWARF-NEXT: 3: 341 +DWARF-NEXT: 7: ?print@MyNameSpace2@@YAXPEAGH@Z:0 +DWARF-NEXT: 1: 0 + +PROBE: main:1116:0 +PROBE-NEXT: 1: 0 +PROBE-NEXT: 3: 0 +PROBE-NEXT: 4: 0 +PROBE-NEXT: 5: 0 +PROBE-NEXT: 8: 0 +PROBE-NEXT: 9: 0 +PROBE-NEXT: 2: ??$init@HG@MyNameSpace2@@YAXHPEAG@Z:0 +PROBE-NEXT: 1: 0 +PROBE-NEXT: 2: 0 +PROBE-NEXT: 3: 0 +PROBE-NEXT: 4: 0 +PROBE-NEXT: 5: 0 +PROBE-NEXT: 6: 0 +PROBE-NEXT: !CFGChecksum: 107105011060 +PROBE-NEXT: 6: ?work1@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:93 +PROBE-NEXT: 1: 0 +PROBE-NEXT: 2: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:93 +PROBE-NEXT: 1: 0 +PROBE-NEXT: 2: 31 +PROBE-NEXT: 4: 31 +PROBE-NEXT: 5: 31 +PROBE-NEXT: !CFGChecksum: 107105011060 +PROBE-NEXT: !CFGChecksum: 281479271677951 +PROBE-NEXT: 7: ?work2@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:1023 +PROBE-NEXT: 2: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:1023 +PROBE-NEXT: 2: 341 +PROBE-NEXT: 3: 0 +PROBE-NEXT: 4: 341 +PROBE-NEXT: 5: 341 +PROBE-NEXT: 6: 0 +PROBE-NEXT: !CFGChecksum: 107105011060 +PROBE-NEXT: !CFGChecksum: 281479271677951 +PROBE-NEXT: 10: ?print@MyNameSpace2@@YAXPEAGH@Z:0 +PROBE-NEXT: 1: 0 +PROBE-NEXT: 2: 0 +PROBE-NEXT: !CFGChecksum: 281479271677951 +PROBE-NEXT: !CFGChecksum: 1126005794311845 ; Original code -; clang-cl.exe -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld -Xclang -fdebug-info-for-profiling -link -debug:dwarf +; clang-cl.exe -O2 -gdwarf -gline-tables-only -fpseudo-probe-for-profiling coff-profile.cpp -fuse-ld=lld -Xclang -fdebug-info-for-profiling -link -debug:dwarf #include <stdio.h> |