33 files changed, 3313 insertions, 771 deletions
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
index b1fe7b1..7ba422d 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll
@@ -615,22 +615,14 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) {
 ; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption'
 ; CHECK-NEXT:  Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption
-; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4611686018427387903
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 4) ]
@@ -652,22 +644,14 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) {
 ; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption'
 ; CHECK-NEXT:  Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption
 ; CHECK-NEXT:    %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr i8, ptr %iv, i64 4
-; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption
-; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
-; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
-; CHECK-NEXT:  Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
-; CHECK-NEXT:   Predicates:
-; CHECK-NEXT:      Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0
+; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4611686018427387903
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 8) ]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index d721b73c..896603d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -70,12 +70,12 @@
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 #
-# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_ABDS (opcode [[G_ABDS:[0-9]+]]): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 #
-# DEBUG-NEXT: G_ABDU (opcode 66): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: G_ABDU (opcode [[G_ABDU:[0-9]+]]): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode [[G_ABDU]] is aliased to [[G_ABDS]]
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 #
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
index 2ad6e68..f730199 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -70,7 +70,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call i64 asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -118,7 +118,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg() {
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -154,7 +154,7 @@ define void @func_uses_asm_physreg_agpr() {
 
 define void @func_uses_asm_physreg_agpr_tuple() {
 ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -168,7 +168,7 @@ declare void @unknown()
 
 define amdgpu_kernel void @kernel_calls_extern() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:    call void @unknown()
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -180,8 +180,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
 
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR10:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT:    call void @unknown() #[[ATTR29:[0-9]+]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -192,7 +192,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 
 define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    call void [[INDIRECT]]()
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -204,8 +204,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR10]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR29]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -316,7 +316,7 @@ define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
 
 define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -342,7 +342,7 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -354,7 +354,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -378,7 +378,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -390,7 +390,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call ptr asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -402,7 +402,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR5]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -414,7 +414,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -426,7 +426,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
 
 define amdgpu_kernel void @kernel_uses_asm_clobber() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -438,7 +438,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber() {
 
 define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -450,7 +450,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
 
 define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -462,7 +462,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
 
 define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -474,7 +474,7 @@ define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -486,7 +486,7 @@ define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -498,7 +498,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -510,7 +510,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -522,7 +522,7 @@ define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
 
 define amdgpu_kernel void @vreg_use_exceeds_register_file() {
 ; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -534,7 +534,7 @@ define amdgpu_kernel void @vreg_use_exceeds_register_file() {
 
 define amdgpu_kernel void @vreg_def_exceeds_register_file() {
 ; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR9]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <257 x i32> asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -546,7 +546,7 @@ define amdgpu_kernel void @vreg_def_exceeds_register_file() {
 
 define amdgpu_kernel void @multiple() {
 ; CHECK-LABEL: define amdgpu_kernel void @multiple(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR10]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -558,7 +558,7 @@ define amdgpu_kernel void @multiple() {
 
 define amdgpu_kernel void @earlyclobber_0() {
 ; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <8 x i32> asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -570,7 +570,7 @@ define amdgpu_kernel void @earlyclobber_0() {
 
 define amdgpu_kernel void @earlyclobber_1() {
 ; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR12:[0-9]+]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -582,7 +582,7 @@ define amdgpu_kernel void @earlyclobber_1() {
 
 define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
 ; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR13:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -594,7 +594,7 @@ define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
 
 define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
 ; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR13]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -606,7 +606,7 @@ define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
 
 define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
 ; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR14:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -618,7 +618,7 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
 
 define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
 ; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR11]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -630,7 +630,7 @@ define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
 
 define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
 ; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR15:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -642,7 +642,7 @@ define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
 
 define amdgpu_kernel void @physreg_raises_limit() {
 ; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR16:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -652,10 +652,9 @@ define amdgpu_kernel void @physreg_raises_limit() {
   ret void
 }
 
-; FIXME: This should require 9. We cannot allocate an a128 at a0.
 define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
 ; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR11]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -667,7 +666,7 @@ define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
 
 define amdgpu_kernel void @align3_virtreg() {
 ; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR6]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -679,7 +678,7 @@ define amdgpu_kernel void @align3_virtreg() {
 
 define amdgpu_kernel void @align3_align4_virtreg() {
 ; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR15]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -691,7 +690,7 @@ define amdgpu_kernel void @align3_align4_virtreg() {
 
 define amdgpu_kernel void @align2_align4_virtreg() {
 ; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR15]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -703,7 +702,7 @@ define amdgpu_kernel void @align2_align4_virtreg() {
 
 define amdgpu_kernel void @kernel_uses_write_register_a55() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
-; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR17:[0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0)
 ; CHECK-NEXT:    ret void
 ;
@@ -713,71 +712,313 @@ define amdgpu_kernel void @kernel_uses_write_register_a55() {
 
 define amdgpu_kernel void @kernel_uses_write_register_v55() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
-; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0)
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.write_register.i64(metadata !1, i32 0)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
-; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-SAME: ) #[[ATTR18:[0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.write_register.i64(metadata !2, i96 0)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19:[0-9]+]] {
 ; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
 ; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %reg = call i32 @llvm.read_register.i64(metadata !0)
   store i32 %reg, ptr addrspace(1) %ptr
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19]] {
 ; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
 ; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %reg = call i32 @llvm.read_volatile_register.i64(metadata !0)
   store i32 %reg, ptr addrspace(1) %ptr
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] {
 ; CHECK-NEXT:    [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
 ; CHECK-NEXT:    store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %reg = call i128 @llvm.read_register.i64(metadata !3)
   store i128 %reg, ptr addrspace(1) %ptr
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
-; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-SAME: ) #[[ATTR9]] {
 ; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.write_register.i64(metadata !4, i32 0)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_uses() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_uses(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(i64 poison)
+  call void asm sideeffect "; use $0", "a"(i32 poison)
+  call void asm sideeffect "; use $0", "a"(i128 poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_defs() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_defs(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT:    [[TMP3:%.*]] = call i128 asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call i64 asm sideeffect "; def $0", "=a"()
+  call i32 asm sideeffect "; def $0", "=a"()
+  call i128 asm sideeffect "; def $0", "=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_use_defs() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_use_defs(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    [[TMP1:%.*]] = call i128 asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(i32 poison)
+  call i128 asm sideeffect "; def $0", "=a"()
+  call void @use_most()
+  ret void
+}
+
+define void @callgraph_b() {
+; CHECK-LABEL: define void @callgraph_b(
+; CHECK-SAME: ) #[[ATTR15]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> asm sideeffect "
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call <4 x i32> asm sideeffect "; def $0", "=a"()
+  call void asm sideeffect "; use $0", "a"(<8 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define void @callgraph_c() {
+; CHECK-LABEL: define void @callgraph_c(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call i32 asm sideeffect "; def $0", "=a"()
+  call void asm sideeffect "; use $0", "a"(<2 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define void @callgraph_a(i1 %cond) {
+; CHECK-LABEL: define void @callgraph_a(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] {
+; CHECK-NEXT:    br i1 [[COND]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       a:
+; CHECK-NEXT:    call void @callgraph_b()
+; CHECK-NEXT:    ret void
+; CHECK:       b:
+; CHECK-NEXT:    call void @callgraph_c()
+; CHECK-NEXT:    ret void
+;
+  br i1 %cond, label %a, label %b
+
+a:
+  call void @callgraph_b()
+  ret void
+
+b:
+  call void @callgraph_c()
+  ret void
+}
+
+
+define void @kernel_max_callgraph(i1 %cond) {
+; CHECK-LABEL: define void @kernel_max_callgraph(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] {
+; CHECK-NEXT:    call void @callgraph_a(i1 [[COND]])
+; CHECK-NEXT:    ret void
+;
+  call void @callgraph_a(i1 %cond)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs(
+; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1(
+; CHECK-SAME: ) #[[ATTR21]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, i32 poison)
+  call void @use_most()
+  ret void
+}
+
+define void @recursive() {
+; CHECK-LABEL: define void @recursive(
+; CHECK-SAME: ) #[[ATTR22:[0-9]+]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    call void @recursive()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(<7 x i32> poison)
+  call void @use_most()
+  call void @recursive()
+  ret void
+}
+
+define void @indirect_0() {
+; CHECK-LABEL: define void @indirect_0(
+; CHECK-SAME: ) #[[ATTR22]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(<7 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define void @indirect_1() {
+; CHECK-LABEL: define void @indirect_1(
+; CHECK-SAME: ) #[[ATTR23:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call <3 x i32> asm sideeffect "; def $0", "=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @knowable_indirect_call(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @knowable_indirect_call(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR22]] {
+; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @indirect_0, ptr @indirect_1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @indirect_1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    call void @indirect_1()
+; CHECK-NEXT:    br label [[TMP6:%.*]]
+; CHECK:       3:
+; CHECK-NEXT:    br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    call void @indirect_0()
+; CHECK-NEXT:    br label [[TMP6]]
+; CHECK:       5:
+; CHECK-NEXT:    unreachable
+; CHECK:       6:
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  %fptr = select i1 %cond, ptr @indirect_0, ptr @indirect_1
+  call void %fptr()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @calls_poison(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @calls_poison(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    call void poison()
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void poison()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @calls_null(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @calls_null(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    call void null()
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void null()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @indirect_unknown(ptr %fptr) {
+; CHECK-LABEL: define amdgpu_kernel void @indirect_unknown(
+; CHECK-SAME: ptr [[FPTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:    call void [[FPTR]]()
+; CHECK-NEXT:    ret void
+;
+  call void %fptr()
   ret void
 }
 
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
+attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
 
 !0 = !{!"a55"}
 !1 = !{!"v55"}
@@ -787,16 +1028,35 @@ attributes #0 = { "amdgpu-agpr-alloc"="0" }
 
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="14" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="256" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="32" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="9" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="64" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="49" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="60" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR27:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR28:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR29]] = { "amdgpu-agpr-alloc"="0" }
 ;.
 ; CHECK: [[META0]] = !{!"a55"}
 ; CHECK: [[META1]] = !{!"v55"}
diff --git a/llvm/test/CodeGen/AMDGPU/mad_int24.ll b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
index 93fda94..dd88310 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
@@ -1,17 +1,79 @@
-; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=GCN --check-prefix=FUNC
-; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=GCN --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=amdgcn| FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=VI
+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG,R600,RW
+; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=EG,R600,CM
 
-; FUNC-LABEL: {{^}}i32_mad24:
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG: MULLO_INT
-; CM: MULLO_INT
-; GCN: s_bfe_i32
-; GCN: s_bfe_i32
-; GCN: s_mul_i32
-; GCN: s_add_i32
 define amdgpu_kernel void @i32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
+; GCN-LABEL: i32_mad24:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0xb
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_bfe_i32 s0, s0, 0x180000
+; GCN-NEXT:    s_bfe_i32 s1, s1, 0x180000
+; GCN-NEXT:    s_mul_i32 s0, s0, s1
+; GCN-NEXT:    s_add_i32 s0, s0, s2
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; VI-LABEL: i32_mad24:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_bfe_i32 s0, s0, 0x180000
+; VI-NEXT:    s_bfe_i32 s1, s1, 0x180000
+; VI-NEXT:    s_mul_i32 s0, s0, s1
+; VI-NEXT:    s_add_i32 s0, s0, s2
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_endpgm
+;
+; RW-LABEL: i32_mad24:
+; RW:       ; %bb.0: ; %entry
+; RW-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; RW-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; RW-NEXT:    CF_END
+; RW-NEXT:    PAD
+; RW-NEXT:    ALU clause starting at 4:
+; RW-NEXT:     LSHL T0.W, KC0[2].Z, literal.x,
+; RW-NEXT:     LSHL * T1.W, KC0[2].W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     ASHR T1.W, PS, literal.x,
+; RW-NEXT:     ASHR * T0.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     MULLO_INT * T0.X, PS, PV.W,
+; RW-NEXT:     ADD_INT T0.X, PS, KC0[3].X,
+; RW-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; RW-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i32_mad24:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 12, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    ALU clause starting at 4:
+; CM-NEXT:     LSHL T0.Z, KC0[2].Z, literal.x,
+; CM-NEXT:     LSHL * T0.W, KC0[2].W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     ASHR T1.Z, PV.W, literal.x,
+; CM-NEXT:     ASHR * T0.W, PV.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T0.W, T1.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T1.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T1.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
+; CM-NEXT:     ADD_INT * T0.X, PV.X, KC0[3].X,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 entry:
   %0 = shl i32 %a, 8
   %a_24 = ashr i32 %0, 8
@@ -23,13 +85,25 @@ entry:
   ret void
 }
 
-; GCN-LABEL: {{^}}mad24_known_bits_destroyed:
-; GCN: s_waitcnt
-; GCN-NEXT: v_mad_i32_i24
-; GCN-NEXT: v_mul_i32_i24
-; GCN-NEXT: s_setpc_b64
 define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
-
+; GCN-LABEL: mad24_known_bits_destroyed:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mad_i32_i24 v1, v0, v1, v2
+; GCN-NEXT:    v_mul_i32_i24_e32 v0, v1, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_known_bits_destroyed:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mad_i32_i24 v1, v0, v1, v2
+; VI-NEXT:    v_mul_i32_i24_e32 v0, v1, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mad24_known_bits_destroyed:
+; EG:       ; %bb.0:
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
   %shl.0 = shl i32 %a, 8
   %sra.0 = ashr i32 %shl.0, 8
   %shl.1 = shl i32 %b, 8
@@ -48,12 +122,25 @@ define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
   ret i32 %mul1
 }
 
-; GCN-LABEL: {{^}}mad24_intrin_known_bits_destroyed:
-; GCN: s_waitcnt
-; GCN-NEXT: v_mad_i32_i24
-; GCN-NEXT: v_mul_i32_i24
-; GCN-NEXT: s_setpc_b64
 define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
+; GCN-LABEL: mad24_intrin_known_bits_destroyed:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mad_i32_i24 v1, v0, v1, v2
+; GCN-NEXT:    v_mul_i32_i24_e32 v0, v1, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_intrin_known_bits_destroyed:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mad_i32_i24 v1, v0, v1, v2
+; VI-NEXT:    v_mul_i32_i24_e32 v0, v1, v0
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mad24_intrin_known_bits_destroyed:
+; EG:       ; %bb.0:
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
   %shl.0 = shl i32 %a, 8
   %sra.0 = ashr i32 %shl.0, 8
   %shl.1 = shl i32 %b, 8
@@ -73,17 +160,177 @@ define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
 }
 
 ; Make sure no unnecessary BFEs are emitted in the loop.
-; GCN-LABEL: {{^}}mad24_destroyed_knownbits_2:
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
 define void @mad24_destroyed_knownbits_2(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %arg3) {
+; GCN-LABEL: mad24_destroyed_knownbits_2:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v5, 1
+; GCN-NEXT:    s_mov_b64 s[4:5], 0
+; GCN-NEXT:  .LBB3_1: ; %bb6
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    v_mad_i32_i24 v0, v0, v5, v5
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, -1, v1
+; GCN-NEXT:    v_mad_i32_i24 v5, v0, v5, v0
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT:    v_mad_i32_i24 v0, v5, v0, v5
+; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT:    v_mad_i32_i24 v0, v0, v5, v0
+; GCN-NEXT:    v_mov_b32_e32 v5, v2
+; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GCN-NEXT:    s_cbranch_execnz .LBB3_1
+; GCN-NEXT:  ; %bb.2: ; %bb5
+; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT:    s_mov_b32 s6, 0
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s4, s6
+; GCN-NEXT:    s_mov_b32 s5, s6
+; GCN-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_destroyed_knownbits_2:
+; VI:       ; %bb.0: ; %bb
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v5, 1
+; VI-NEXT:    s_mov_b64 s[4:5], 0
+; VI-NEXT:  .LBB3_1: ; %bb6
+; VI-NEXT:    ; =>This Inner Loop Header: Depth=1
+; VI-NEXT:    v_mad_i32_i24 v0, v0, v5, v5
+; VI-NEXT:    v_mad_i32_i24 v5, v0, v5, v0
+; VI-NEXT:    v_add_u32_e32 v1, vcc, -1, v1
+; VI-NEXT:    v_mad_i32_i24 v0, v5, v0, v5
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_mad_i32_i24 v0, v0, v5, v0
+; VI-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; VI-NEXT:    v_mov_b32_e32 v5, v2
+; VI-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; VI-NEXT:    s_cbranch_execnz .LBB3_1
+; VI-NEXT:  ; %bb.2: ; %bb5
+; VI-NEXT:    s_or_b64 exec, exec, s[4:5]
+; VI-NEXT:    flat_store_dword v[3:4], v0
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; RW-LABEL: mad24_destroyed_knownbits_2:
+; RW:       ; %bb.0: ; %bb
+; RW-NEXT:    ALU 5, @10, KC0[CB0:0-32], KC1[]
+; RW-NEXT:    LOOP_START_DX10 @7
+; RW-NEXT:    ALU_PUSH_BEFORE 30, @16, KC0[], KC1[]
+; RW-NEXT:    JUMP @6 POP:1
+; RW-NEXT:    LOOP_BREAK @6
+; RW-NEXT:    POP @6 POP:1
+; RW-NEXT:    END_LOOP @2
+; RW-NEXT:    ALU 1, @47, KC0[], KC1[]
+; RW-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; RW-NEXT:    CF_END
+; RW-NEXT:    ALU clause starting at 10:
+; RW-NEXT:     MOV T0.X, KC0[2].Y,
+; RW-NEXT:     MOV T0.Y, KC0[2].Z,
+; RW-NEXT:     MOV * T0.Z, KC0[2].W,
+; RW-NEXT:     MOV T0.W, KC0[3].X,
+; RW-NEXT:     MOV * T1.W, literal.x,
+; RW-NEXT:    1(1.401298e-45), 0(0.000000e+00)
+; RW-NEXT:    ALU clause starting at 16:
+; RW-NEXT:     LSHL T2.W, T1.W, literal.x,
+; RW-NEXT:     LSHL * T3.W, T0.X, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     ASHR T3.W, PS, literal.x,
+; RW-NEXT:     ASHR * T2.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     MULLO_INT * T0.X, PV.W, PS,
+; RW-NEXT:     ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT:     LSHL * T3.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     ASHR * T3.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     MULLO_INT * T0.X, PV.W, T2.W,
+; RW-NEXT:     ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT:     LSHL * T2.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     ASHR * T2.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     MULLO_INT * T0.X, PV.W, T3.W,
+; RW-NEXT:     ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT:     LSHL * T3.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     ASHR * T3.W, PV.W, literal.x,
+; RW-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT:     ADD_INT T0.Y, T0.Y, literal.x,
+; RW-NEXT:     MULLO_INT * T0.X, PV.W, T2.W,
+; RW-NEXT:    -1(nan), 0(0.000000e+00)
+; RW-NEXT:     ADD_INT T0.X, PS, T1.W,
+; RW-NEXT:     SETE_INT T2.W, PV.Y, 0.0,
+; RW-NEXT:     MOV * T1.W, T0.Z,
+; RW-NEXT:     PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; RW-NEXT:    ALU clause starting at 47:
+; RW-NEXT:     LSHR * T1.X, T0.W, literal.x,
+; RW-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: mad24_destroyed_knownbits_2:
+; CM:       ; %bb.0: ; %bb
+; CM-NEXT:    ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    LOOP_START_DX10 @7
+; CM-NEXT:    ALU_PUSH_BEFORE 41, @16, KC0[], KC1[]
+; CM-NEXT:    JUMP @6 POP:1
+; CM-NEXT:    LOOP_BREAK @6
+; CM-NEXT:    POP @6 POP:1
+; CM-NEXT:    END_LOOP @2
+; CM-NEXT:    ALU 1, @58, KC0[], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     MOV * T1.X, KC0[2].Y,
+; CM-NEXT:     MOV T0.X, KC0[2].Z,
+; CM-NEXT:     MOV T0.Y, KC0[2].W,
+; CM-NEXT:     MOV T0.Z, KC0[3].X,
+; CM-NEXT:     MOV * T0.W, literal.x,
+; CM-NEXT:    1(1.401298e-45), 0(0.000000e+00)
+; CM-NEXT:    ALU clause starting at 16:
+; CM-NEXT:     LSHL T1.Z, T0.W, literal.x,
+; CM-NEXT:     LSHL * T1.W, T1.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     ASHR T2.Z, PV.W, literal.x,
+; CM-NEXT:     ASHR * T1.W, PV.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T1.X, T2.Z, T1.W,
+; CM-NEXT:     MULLO_INT T1.Y (MASKED), T2.Z, T1.W,
+; CM-NEXT:     MULLO_INT T1.Z (MASKED), T2.Z, T1.W,
+; CM-NEXT:     MULLO_INT * T1.W (MASKED), T2.Z, T1.W,
+; CM-NEXT:     ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT:     LSHL * T2.W, PV.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     ASHR * T2.W, PV.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T1.X, T2.W, T1.W,
+; CM-NEXT:     MULLO_INT T1.Y (MASKED), T2.W, T1.W,
+; CM-NEXT:     MULLO_INT T1.Z (MASKED), T2.W, T1.W,
+; CM-NEXT:     MULLO_INT * T1.W (MASKED), T2.W, T1.W,
+; CM-NEXT:     ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT:     LSHL * T1.W, PV.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     ASHR * T1.W, PV.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T1.X, T1.W, T2.W,
+; CM-NEXT:     MULLO_INT T1.Y (MASKED), T1.W, T2.W,
+; CM-NEXT:     MULLO_INT T1.Z (MASKED), T1.W, T2.W,
+; CM-NEXT:     MULLO_INT * T1.W (MASKED), T1.W, T2.W,
+; CM-NEXT:     ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT:     LSHL * T2.W, PV.W, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     ADD_INT T0.X, T0.X, literal.x,
+; CM-NEXT:     ASHR * T2.W, PV.W, literal.y,
+; CM-NEXT:    -1(nan), 8(1.121039e-44)
+; CM-NEXT:     MULLO_INT T1.X, T2.W, T1.W,
+; CM-NEXT:     MULLO_INT T1.Y (MASKED), T2.W, T1.W,
+; CM-NEXT:     MULLO_INT T1.Z (MASKED), T2.W, T1.W,
+; CM-NEXT:     MULLO_INT * T1.W (MASKED), T2.W, T1.W,
+; CM-NEXT:     ADD_INT T1.X, PV.X, T0.W,
+; CM-NEXT:     SETE_INT T1.Z, T0.X, 0.0,
+; CM-NEXT:     MOV * T0.W, T0.Y,
+; CM-NEXT:     PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.Z, 0.0,
+; CM-NEXT:    ALU clause starting at 58:
+; CM-NEXT:     LSHR * T0.X, T0.Z, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 bb:
   br label %bb6
 
@@ -119,3 +366,5 @@ bb6:                                              ; preds = %bb6, %bb
 }
 
 declare i32 @llvm.amdgcn.mul.i24(i32, i32)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; R600: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
index a6d458e..46b8df4 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
@@ -1,19 +1,75 @@
-; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN
-; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
-; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG
+; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=CM
+; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,SI
+; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,VI
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
-; FUNC-LABEL: {{^}}u32_mad24:
-; EG: MULLO_INT
-; SI: s_mul_i32
-; SI: s_add_i32
-; VI: s_mul_{{[iu]}}32
-; VI: s_add_{{[iu]}}32
-
 define amdgpu_kernel void @u32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
+; EG-LABEL: u32_mad24:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 6, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    ALU clause starting at 4:
+; EG-NEXT:     AND_INT T0.W, KC0[2].W, literal.x,
+; EG-NEXT:     AND_INT * T1.W, KC0[2].Z, literal.x,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT:     ADD_INT T0.X, PS, KC0[3].X,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: u32_mad24:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    ALU clause starting at 4:
+; CM-NEXT:     AND_INT T0.Z, KC0[2].W, literal.x,
+; CM-NEXT:     AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT:     ADD_INT * T0.X, PV.X, KC0[3].X,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: u32_mad24:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0xb
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_and_b32 s0, s0, 0xffffff
+; GCN-NEXT:    s_and_b32 s1, s1, 0xffffff
+; GCN-NEXT:    s_mul_i32 s0, s0, s1
+; GCN-NEXT:    s_add_i32 s0, s0, s2
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: u32_mad24:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 s7, 0xf000
+; GFX8-NEXT:    s_mov_b32 s6, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_and_b32 s0, s0, 0xffffff
+; GFX8-NEXT:    s_and_b32 s1, s1, 0xffffff
+; GFX8-NEXT:    s_mul_i32 s0, s0, s1
+; GFX8-NEXT:    s_add_i32 s0, s0, s2
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = shl i32 %a, 8
   %a_24 = lshr i32 %0, 8
@@ -25,18 +81,88 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}i16_mad24:
 ; The order of A and B does not matter.
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
 ; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 16
-; GCN:	s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}}
-; GCN:	s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}}
-; GCN:	s_sext_i32_i16 [[EXT:s[0-9]]], [[MAD]]
-; GCN:	v_mov_b32_e32 v0, [[EXT]]
 define amdgpu_kernel void @i16_mad24(ptr addrspace(1) %out, i16 %a, i16 %b, i16 %c) {
+; EG-LABEL: i16_mad24:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 0, @12, KC0[], KC1[]
+; EG-NEXT:    TEX 2 @6
+; EG-NEXT:    ALU 4, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 40, #3
+; EG-NEXT:     VTX_READ_16 T2.X, T0.X, 42, #3
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 44, #3
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV * T0.X, 0.0,
+; EG-NEXT:    ALU clause starting at 13:
+; EG-NEXT:     MULLO_INT * T0.Y, T1.X, T2.X,
+; EG-NEXT:     ADD_INT * T0.W, PS, T0.X,
+; EG-NEXT:     BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i16_mad24:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 0, @12, KC0[], KC1[]
+; CM-NEXT:    TEX 2 @6
+; CM-NEXT:    ALU 8, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 6:
+; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 40, #3
+; CM-NEXT:     VTX_READ_16 T2.X, T0.X, 42, #3
+; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 44, #3
+; CM-NEXT:    ALU clause starting at 12:
+; CM-NEXT:     MOV * T0.X, 0.0,
+; CM-NEXT:    ALU clause starting at 13:
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T1.X, T2.X,
+; CM-NEXT:     MULLO_INT T0.Y, T1.X, T2.X,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T1.X, T2.X,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T1.X, T2.X,
+; CM-NEXT:     ADD_INT * T0.W, PV.Y, T0.X,
+; CM-NEXT:     BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i16_mad24:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT:    s_load_dword s4, s[4:5], 0xb
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_lshr_b32 s2, s2, 16
+; GCN-NEXT:    s_mul_i32 s2, s4, s2
+; GCN-NEXT:    s_add_i32 s2, s2, s3
+; GCN-NEXT:    s_sext_i32_i16 s2, s2
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: i16_mad24:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT:    s_load_dword s8, s[4:5], 0x2c
+; GFX8-NEXT:    s_mov_b32 s7, 0xf000
+; GFX8-NEXT:    s_mov_b32 s6, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_mov_b32 s4, s0
+; GFX8-NEXT:    s_lshr_b32 s0, s2, 16
+; GFX8-NEXT:    s_mul_i32 s0, s8, s0
+; GFX8-NEXT:    s_add_i32 s0, s0, s3
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    s_mov_b32 s5, s1
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = mul i16 %a, %b
   %1 = add i16 %0, %c
@@ -46,17 +172,85 @@ entry:
 }
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
-; FUNC-LABEL: {{^}}i8_mad24:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
 ; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; GCN:	s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}}
-; GCN:	s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}}
-; GCN:	s_sext_i32_i8 [[EXT:s[0-9]]], [[MAD]]
-; GCN:	v_mov_b32_e32 v0, [[EXT]]
 define amdgpu_kernel void @i8_mad24(ptr addrspace(1) %out, i8 %a, i8 %b, i8 %c) {
+; EG-LABEL: i8_mad24:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 0, @12, KC0[], KC1[]
+; EG-NEXT:    TEX 2 @6
+; EG-NEXT:    ALU 4, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_8 T1.X, T0.X, 40, #3
+; EG-NEXT:     VTX_READ_8 T2.X, T0.X, 41, #3
+; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 42, #3
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV * T0.X, 0.0,
+; EG-NEXT:    ALU clause starting at 13:
+; EG-NEXT:     MULLO_INT * T0.Y, T1.X, T2.X,
+; EG-NEXT:     ADD_INT * T0.W, PS, T0.X,
+; EG-NEXT:     BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    8(1.121039e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i8_mad24:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 0, @12, KC0[], KC1[]
+; CM-NEXT:    TEX 2 @6
+; CM-NEXT:    ALU 8, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 6:
+; CM-NEXT:     VTX_READ_8 T1.X, T0.X, 40, #3
+; CM-NEXT:     VTX_READ_8 T2.X, T0.X, 41, #3
+; CM-NEXT:     VTX_READ_8 T0.X, T0.X, 42, #3
+; CM-NEXT:    ALU clause starting at 12:
+; CM-NEXT:     MOV * T0.X, 0.0,
+; CM-NEXT:    ALU clause starting at 13:
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T1.X, T2.X,
+; CM-NEXT:     MULLO_INT T0.Y, T1.X, T2.X,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T1.X, T2.X,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T1.X, T2.X,
+; CM-NEXT:     ADD_INT * T0.W, PV.Y, T0.X,
+; CM-NEXT:     BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad24:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0xb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_lshr_b32 s4, s2, 8
+; GCN-NEXT:    s_lshr_b32 s5, s2, 16
+; GCN-NEXT:    s_mul_i32 s2, s2, s4
+; GCN-NEXT:    s_add_i32 s2, s2, s5
+; GCN-NEXT:    s_sext_i32_i8 s4, s2
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: i8_mad24:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_lshr_b32 s4, s6, 8
+; GFX8-NEXT:    s_lshr_b32 s5, s6, 16
+; GFX8-NEXT:    s_mul_i32 s4, s6, s4
+; GFX8-NEXT:    s_add_i32 s4, s4, s5
+; GFX8-NEXT:    s_sext_i32_i8 s4, s4
+; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = mul i8 %a, %b
   %1 = add i8 %0, %c
@@ -72,11 +266,75 @@ entry:
 ; 24-bit mad pattern wasn't being matched.
 
 ; Check that the select instruction is not deleted.
-; FUNC-LABEL: {{^}}i24_i32_i32_mad:
-; EG: CNDE_INT
-; SI: s_cselect
-; GCN2: s_cselect
 define amdgpu_kernel void @i24_i32_i32_mad(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+; EG-LABEL: i24_i32_i32_mad:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 7, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    ALU clause starting at 4:
+; EG-NEXT:     ASHR * T0.W, KC0[2].Z, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W,
+; EG-NEXT:    34(4.764415e-44), 0(0.000000e+00)
+; EG-NEXT:     MULLO_INT * T0.X, PV.W, KC0[3].X,
+; EG-NEXT:     ADD_INT T0.X, PS, KC0[3].Y,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i24_i32_i32_mad:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 10, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    ALU clause starting at 4:
+; CM-NEXT:     ASHR * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W,
+; CM-NEXT:    34(4.764415e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T0.W, KC0[3].X,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, KC0[3].X,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, KC0[3].X,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, KC0[3].X,
+; CM-NEXT:     ADD_INT * T0.X, PV.X, KC0[3].Y,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i24_i32_i32_mad:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dword s2, s[4:5], 0xb
+; GCN-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_ashr_i32 s2, s2, 8
+; GCN-NEXT:    s_cmp_lg_u32 s6, 0
+; GCN-NEXT:    s_cselect_b32 s2, s2, 34
+; GCN-NEXT:    s_mul_i32 s2, s2, s6
+; GCN-NEXT:    s_add_i32 s4, s2, s7
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: i24_i32_i32_mad:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dword s8, s[4:5], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_ashr_i32 s4, s8, 8
+; GFX8-NEXT:    s_cmp_lg_u32 s6, 0
+; GFX8-NEXT:    s_cselect_b32 s4, s4, 34
+; GFX8-NEXT:    s_mul_i32 s4, s4, s6
+; GFX8-NEXT:    s_add_i32 s4, s4, s7
+; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
 entry:
   %0 = ashr i32 %a, 8
   %1 = icmp ne i32 %c, 0
@@ -87,13 +345,139 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}extra_and:
-; SI-NOT: v_and
-; SI: s_mul_i32
-; SI: s_mul_i32
-; SI: s_add_i32
-; SI: s_add_i32
 define amdgpu_kernel void @extra_and(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) {
+; EG-LABEL: extra_and:
+; EG:       ; %bb.0: ; %bb
+; EG-NEXT:    ALU 5, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    LOOP_START_DX10 @7
+; EG-NEXT:    ALU_PUSH_BEFORE 12, @16, KC0[], KC1[]
+; EG-NEXT:    JUMP @6 POP:1
+; EG-NEXT:    LOOP_BREAK @6
+; EG-NEXT:    POP @6 POP:1
+; EG-NEXT:    END_LOOP @2
+; EG-NEXT:    ALU 1, @29, KC0[], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T1.W, literal.x,
+; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
+; EG-NEXT:     MOV * T3.W, PV.W,
+; EG-NEXT:     MOV T0.Z, KC0[2].Y,
+; EG-NEXT:     MOV T0.W, KC0[2].Z,
+; EG-NEXT:     MOV * T2.W, KC0[2].W,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     AND_INT T1.W, T1.W, literal.x,
+; EG-NEXT:     AND_INT * T4.W, T3.W, literal.x,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T3.W, T3.W, literal.x,
+; EG-NEXT:     MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT:     ADD_INT T3.W, T2.W, PS,
+; EG-NEXT:     ADD_INT * T1.W, T0.W, T0.X,
+; EG-NEXT:     ADD_INT * T0.X, PS, PV.W,
+; EG-NEXT:     SETNE_INT * T4.W, PV.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT:    ALU clause starting at 29:
+; EG-NEXT:     LSHR * T1.X, T0.Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: extra_and:
+; CM:       ; %bb.0: ; %bb
+; CM-NEXT:    ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    LOOP_START_DX10 @7
+; CM-NEXT:    ALU_PUSH_BEFORE 17, @16, KC0[], KC1[]
+; CM-NEXT:    JUMP @6 POP:1
+; CM-NEXT:    LOOP_BREAK @6
+; CM-NEXT:    POP @6 POP:1
+; CM-NEXT:    END_LOOP @2
+; CM-NEXT:    ALU 1, @34, KC0[], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     MOV * T0.W, literal.x,
+; CM-NEXT:    0(0.000000e+00), 0(0.000000e+00)
+; CM-NEXT:     MOV * T1.Z, PV.W,
+; CM-NEXT:     MOV T0.Y, KC0[2].Y,
+; CM-NEXT:     MOV T0.Z, KC0[2].Z,
+; CM-NEXT:     MOV * T1.W, KC0[2].W,
+; CM-NEXT:    ALU clause starting at 16:
+; CM-NEXT:     AND_INT T1.Y, T1.Z, literal.x,
+; CM-NEXT:     AND_INT T2.Z, T0.W, literal.x,
+; CM-NEXT:     AND_INT * T0.W, T1.Z, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T1.Y, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T1.Y, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T1.Y, T2.Z,
+; CM-NEXT:     MULLO_INT * T0.W, T1.Y, T2.Z,
+; CM-NEXT:     ADD_INT T1.Z, T1.W, PV.W,
+; CM-NEXT:     ADD_INT * T0.W, T0.Z, T0.X,
+; CM-NEXT:     ADD_INT * T0.X, PV.W, PV.Z,
+; CM-NEXT:     SETNE_INT * T2.W, PV.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; CM-NEXT:    ALU clause starting at 34:
+; CM-NEXT:     LSHR * T1.X, T0.Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: extra_and:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GCN-NEXT:    s_mov_b32 s2, 0
+; GCN-NEXT:    s_mov_b32 s6, 0
+; GCN-NEXT:  .LBB4_1: ; %bb4
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_and_b32 s3, s6, 0xffffff
+; GCN-NEXT:    s_and_b32 s6, s6, 0xffffff
+; GCN-NEXT:    s_and_b32 s2, s2, 0xffffff
+; GCN-NEXT:    s_mul_i32 s3, s3, s2
+; GCN-NEXT:    s_mul_i32 s6, s6, s2
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s2, s0, s3
+; GCN-NEXT:    s_add_i32 s6, s1, s6
+; GCN-NEXT:    s_add_i32 s3, s2, s6
+; GCN-NEXT:    s_cmp_lg_u32 s3, 8
+; GCN-NEXT:    s_cbranch_scc1 .LBB4_1
+; GCN-NEXT:  ; %bb.2: ; %bb18
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s3
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: extra_and:
+; GFX8:       ; %bb.0: ; %bb
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT:    s_mov_b32 s2, 0
+; GFX8-NEXT:    s_mov_b32 s6, 0
+; GFX8-NEXT:  .LBB4_1: ; %bb4
+; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT:    s_and_b32 s3, s6, 0xffffff
+; GFX8-NEXT:    s_and_b32 s6, s6, 0xffffff
+; GFX8-NEXT:    s_and_b32 s2, s2, 0xffffff
+; GFX8-NEXT:    s_mul_i32 s3, s3, s2
+; GFX8-NEXT:    s_mul_i32 s6, s6, s2
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_i32 s2, s0, s3
+; GFX8-NEXT:    s_add_i32 s6, s1, s6
+; GFX8-NEXT:    s_add_i32 s3, s2, s6
+; GFX8-NEXT:    s_cmp_lg_u32 s3, 8
+; GFX8-NEXT:    s_cbranch_scc1 .LBB4_1
+; GFX8-NEXT:  ; %bb.2: ; %bb18
+; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 s7, 0xf000
+; GFX8-NEXT:    s_mov_b32 s6, -1
+; GFX8-NEXT:    v_mov_b32_e32 v0, s3
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT:    s_endpgm
 bb:
   br label %bb4
 
@@ -119,13 +503,139 @@ bb18:                                             ; preds = %bb4
   ret void
 }
 
-; FUNC-LABEL: {{^}}dont_remove_shift
-; SI: s_lshr
-; SI: s_mul_i32
-; SI: s_mul_i32
-; SI: s_add_i32
-; SI: s_add_i32
 define amdgpu_kernel void @dont_remove_shift(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) {
+; EG-LABEL: dont_remove_shift:
+; EG:       ; %bb.0: ; %bb
+; EG-NEXT:    ALU 5, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    LOOP_START_DX10 @7
+; EG-NEXT:    ALU_PUSH_BEFORE 12, @16, KC0[], KC1[]
+; EG-NEXT:    JUMP @6 POP:1
+; EG-NEXT:    LOOP_BREAK @6
+; EG-NEXT:    POP @6 POP:1
+; EG-NEXT:    END_LOOP @2
+; EG-NEXT:    ALU 1, @29, KC0[], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T1.W, literal.x,
+; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
+; EG-NEXT:     MOV * T3.W, PV.W,
+; EG-NEXT:     MOV T0.Z, KC0[2].Y,
+; EG-NEXT:     MOV T0.W, KC0[2].Z,
+; EG-NEXT:     MOV * T2.W, KC0[2].W,
+; EG-NEXT:    ALU clause starting at 16:
+; EG-NEXT:     LSHR T1.W, T1.W, literal.x,
+; EG-NEXT:     LSHR * T4.W, T3.W, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     LSHR T3.W, T3.W, literal.x,
+; EG-NEXT:     MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT:     ADD_INT T3.W, T2.W, PS,
+; EG-NEXT:     ADD_INT * T1.W, T0.W, T0.X,
+; EG-NEXT:     ADD_INT * T0.X, PS, PV.W,
+; EG-NEXT:     SETNE_INT * T4.W, PV.X, literal.x,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT:    ALU clause starting at 29:
+; EG-NEXT:     LSHR * T1.X, T0.Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: dont_remove_shift:
+; CM:       ; %bb.0: ; %bb
+; CM-NEXT:    ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    LOOP_START_DX10 @7
+; CM-NEXT:    ALU_PUSH_BEFORE 17, @16, KC0[], KC1[]
+; CM-NEXT:    JUMP @6 POP:1
+; CM-NEXT:    LOOP_BREAK @6
+; CM-NEXT:    POP @6 POP:1
+; CM-NEXT:    END_LOOP @2
+; CM-NEXT:    ALU 1, @34, KC0[], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     MOV * T0.W, literal.x,
+; CM-NEXT:    0(0.000000e+00), 0(0.000000e+00)
+; CM-NEXT:     MOV * T1.Z, PV.W,
+; CM-NEXT:     MOV T0.Y, KC0[2].Y,
+; CM-NEXT:     MOV T0.Z, KC0[2].Z,
+; CM-NEXT:     MOV * T1.W, KC0[2].W,
+; CM-NEXT:    ALU clause starting at 16:
+; CM-NEXT:     LSHR T1.Y, T1.Z, literal.x,
+; CM-NEXT:     LSHR T2.Z, T0.W, literal.x,
+; CM-NEXT:     LSHR * T0.W, T1.Z, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T2.Z,
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T1.Y, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T1.Y, T2.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T1.Y, T2.Z,
+; CM-NEXT:     MULLO_INT * T0.W, T1.Y, T2.Z,
+; CM-NEXT:     ADD_INT T1.Z, T1.W, PV.W,
+; CM-NEXT:     ADD_INT * T0.W, T0.Z, T0.X,
+; CM-NEXT:     ADD_INT * T0.X, PV.W, PV.Z,
+; CM-NEXT:     SETNE_INT * T2.W, PV.X, literal.x,
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; CM-NEXT:    ALU clause starting at 34:
+; CM-NEXT:     LSHR * T1.X, T0.Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: dont_remove_shift:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GCN-NEXT:    s_mov_b32 s2, 0
+; GCN-NEXT:    s_mov_b32 s6, 0
+; GCN-NEXT:  .LBB5_1: ; %bb4
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_lshr_b32 s3, s6, 8
+; GCN-NEXT:    s_lshr_b32 s6, s6, 8
+; GCN-NEXT:    s_lshr_b32 s2, s2, 8
+; GCN-NEXT:    s_mul_i32 s3, s3, s2
+; GCN-NEXT:    s_mul_i32 s6, s6, s2
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s2, s0, s3
+; GCN-NEXT:    s_add_i32 s6, s1, s6
+; GCN-NEXT:    s_add_i32 s3, s2, s6
+; GCN-NEXT:    s_cmp_lg_u32 s3, 8
+; GCN-NEXT:    s_cbranch_scc1 .LBB5_1
+; GCN-NEXT:  ; %bb.2: ; %bb18
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s3
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: dont_remove_shift:
+; GFX8:       ; %bb.0: ; %bb
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT:    s_mov_b32 s2, 0
+; GFX8-NEXT:    s_mov_b32 s6, 0
+; GFX8-NEXT:  .LBB5_1: ; %bb4
+; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT:    s_lshr_b32 s3, s6, 8
+; GFX8-NEXT:    s_lshr_b32 s6, s6, 8
+; GFX8-NEXT:    s_lshr_b32 s2, s2, 8
+; GFX8-NEXT:    s_mul_i32 s3, s3, s2
+; GFX8-NEXT:    s_mul_i32 s6, s6, s2
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_add_i32 s2, s0, s3
+; GFX8-NEXT:    s_add_i32 s6, s1, s6
+; GFX8-NEXT:    s_add_i32 s3, s2, s6
+; GFX8-NEXT:    s_cmp_lg_u32 s3, 8
+; GFX8-NEXT:    s_cbranch_scc1 .LBB5_1
+; GFX8-NEXT:  ; %bb.2: ; %bb18
+; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT:    s_mov_b32 s7, 0xf000
+; GFX8-NEXT:    s_mov_b32 s6, -1
+; GFX8-NEXT:    v_mov_b32_e32 v0, s3
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT:    s_endpgm
 bb:
   br label %bb4
 
@@ -151,19 +661,234 @@ bb18:                                             ; preds = %bb4
   ret void
 }
 
-; FUNC-LABEL: {{^}}i8_mad_sat_16:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
-; SI: v_med3_i32 v{{[0-9]}}, [[EXT]],
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_max_i16_e32 [[MAX:v[0-9]]], 0xff80, [[MAD]]
-; VI: v_min_i16_e32 {{v[0-9]}}, 0x7f, [[MAX]]
 define amdgpu_kernel void @i8_mad_sat_16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_sat_16:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @10
+; EG-NEXT:    ALU 24, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_8 T3.X, T3.X, 0, #1
+; EG-NEXT:     VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT:     MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT:     ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT:    ALU clause starting at 19:
+; EG-NEXT:     ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT:     ADD_INT * T3.X, KC0[3].X, T0.X,
+; EG-NEXT:    ALU clause starting at 21:
+; EG-NEXT:     BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT:     BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T1.W, T3.X, 0.0, literal.x,
+; EG-NEXT:     MULLO_INT * T0.Y, PV.Z, PV.W,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     MAX_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     ADD_INT * T1.W, KC0[2].Y, T0.X,
+; EG-NEXT:    -128(nan), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T2.W, PS, literal.x,
+; EG-NEXT:     MIN_INT * T0.W, PV.W, literal.y,
+; EG-NEXT:    3(4.203895e-45), 127(1.779649e-43)
+; EG-NEXT:     AND_INT T0.W, PS, literal.x,
+; EG-NEXT:     LSHL * T2.W, PV.W, literal.y,
+; EG-NEXT:    255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT:     LSHL T0.X, PV.W, PS,
+; EG-NEXT:     LSHL * T0.W, literal.x, PS,
+; EG-NEXT:    255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     MOV * T0.Z, 0.0,
+; EG-NEXT:     LSHR * T1.X, T1.W, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i8_mad_sat_16:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 0 @8
+; CM-NEXT:    ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 1 @10
+; CM-NEXT:    ALU 26, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT MSKOR T1.XW, T0.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 8:
+; CM-NEXT:     VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT:    Fetch clause starting at 10:
+; CM-NEXT:     VTX_READ_8 T3.X, T3.X, 0, #1
+; CM-NEXT:     VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT:    ALU clause starting at 14:
+; CM-NEXT:     LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT:     MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT:     ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT:    ALU clause starting at 19:
+; CM-NEXT:     ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT:     ADD_INT * T3.X, KC0[2].Z, T0.X,
+; CM-NEXT:    ALU clause starting at 21:
+; CM-NEXT:     BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT:     BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:     BFE_INT * T0.W, T3.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T0.Z, T0.W,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.Z, T0.W,
+; CM-NEXT:     MULLO_INT T0.Z, T0.Z, T0.W,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.Z, T0.W,
+; CM-NEXT:     ADD_INT * T0.W, PV.Z, T0.Y,
+; CM-NEXT:     BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     MAX_INT T0.Z, PV.W, literal.x,
+; CM-NEXT:     ADD_INT * T0.W, KC0[2].Y, T0.X,
+; CM-NEXT:    -128(nan), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T1.Z, PV.W, literal.x,
+; CM-NEXT:     MIN_INT * T1.W, PV.Z, literal.y,
+; CM-NEXT:    3(4.203895e-45), 127(1.779649e-43)
+; CM-NEXT:     AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT:     LSHL * T1.W, PV.Z, literal.y,
+; CM-NEXT:    255(3.573311e-43), 3(4.203895e-45)
+; CM-NEXT:     LSHL T1.X, PV.Z, PV.W,
+; CM-NEXT:     LSHL * T1.W, literal.x, PV.W,
+; CM-NEXT:    255(3.573311e-43), 0(0.000000e+00)
+; CM-NEXT:     MOV T1.Y, 0.0,
+; CM-NEXT:     MOV * T1.Z, 0.0,
+; CM-NEXT:     LSHR * T0.X, T0.W, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad_sat_16:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GCN-NEXT:    s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GCN-NEXT:    s_mov_b32 s22, -1
+; GCN-NEXT:    s_mov_b32 s23, 0xe8f000
+; GCN-NEXT:    s_add_u32 s20, s20, s11
+; GCN-NEXT:    s_addc_u32 s21, s21, 0
+; GCN-NEXT:    s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s9, s8, 4
+; GCN-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT:    v_mov_b32_e32 v0, s8
+; GCN-NEXT:    v_mov_b32_e32 v1, s9
+; GCN-NEXT:    buffer_load_dword v1, v1, s[20:23], 0 offen
+; GCN-NEXT:    buffer_load_dword v0, v0, s[20:23], 0 offen
+; GCN-NEXT:    s_mov_b32 s11, 0xf000
+; GCN-NEXT:    s_mov_b32 s10, 0
+; GCN-NEXT:    s_mov_b64 s[14:15], s[10:11]
+; GCN-NEXT:    s_mov_b64 s[18:19], s[10:11]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[12:13], s[4:5]
+; GCN-NEXT:    s_mov_b64 s[16:17], s[6:7]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT:    buffer_load_sbyte v3, v[0:1], s[8:11], 0 addr64
+; GCN-NEXT:    buffer_load_sbyte v4, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT:    s_movk_i32 s2, 0xff80
+; GCN-NEXT:    s_waitcnt vmcnt(2)
+; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mad_u32_u24 v2, v2, v3, v4
+; GCN-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; GCN-NEXT:    v_mov_b32_e32 v3, 0x7f
+; GCN-NEXT:    v_med3_i32 v2, v2, s2, v3
+; GCN-NEXT:    s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT:    s_endpgm
+;
+; SI-LABEL: i8_mad_sat_16:
+; SI:       ; %bb.0: ; %entry
+; SI-NEXT:    s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_load_dword s0, s[4:5], 0x44
+; SI-NEXT:    s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s90, -1
+; SI-NEXT:    s_mov_b32 s91, 0xe80000
+; SI-NEXT:    s_add_u32 s88, s88, s11
+; SI-NEXT:    s_addc_u32 s89, s89, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_add_i32 s1, s0, 4
+; SI-NEXT:    v_mov_b32_e32 v0, s0
+; SI-NEXT:    buffer_load_dword v6, v0, s[88:91], 0 offen
+; SI-NEXT:    v_mov_b32_e32 v0, s1
+; SI-NEXT:    buffer_load_dword v7, v0, s[88:91], 0 offen
+; SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    v_mov_b32_e32 v3, s5
+; SI-NEXT:    v_mov_b32_e32 v5, s7
+; SI-NEXT:    s_waitcnt vmcnt(1)
+; SI-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; SI-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
+; SI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; SI-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
+; SI-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; SI-NEXT:    flat_load_sbyte v0, v[0:1]
+; SI-NEXT:    flat_load_sbyte v1, v[2:3]
+; SI-NEXT:    flat_load_sbyte v2, v[4:5]
+; SI-NEXT:    v_mov_b32_e32 v3, s1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_mad_u16 v0, v1, v0, v2
+; SI-NEXT:    v_max_i16_e32 v0, 0xff80, v0
+; SI-NEXT:    v_min_i16_e32 v2, 0x7f, v0
+; SI-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
+; SI-NEXT:    v_addc_u32_e32 v1, vcc, v3, v7, vcc
+; SI-NEXT:    flat_store_byte v[0:1], v2
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: i8_mad_sat_16:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_load_dword s0, s[4:5], 0x44
+; VI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s14, -1
+; VI-NEXT:    s_mov_b32 s15, 0xe80000
+; VI-NEXT:    s_add_u32 s12, s12, s11
+; VI-NEXT:    s_addc_u32 s13, s13, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_add_i32 s1, s0, 4
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    buffer_load_dword v6, v0, s[12:15], 0 offen
+; VI-NEXT:    v_mov_b32_e32 v0, s1
+; VI-NEXT:    buffer_load_dword v7, v0, s[12:15], 0 offen
+; VI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_mov_b32_e32 v3, s5
+; VI-NEXT:    v_mov_b32_e32 v5, s7
+; VI-NEXT:    s_waitcnt vmcnt(1)
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v6
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v6
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s6, v6
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; VI-NEXT:    flat_load_sbyte v0, v[0:1]
+; VI-NEXT:    flat_load_sbyte v1, v[2:3]
+; VI-NEXT:    flat_load_sbyte v2, v[4:5]
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mad_u16 v0, v1, v0, v2
+; VI-NEXT:    v_max_i16_e32 v0, 0xff80, v0
+; VI-NEXT:    v_min_i16_e32 v2, 0x7f, v0
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v6
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, v3, v7, vcc
+; VI-NEXT:    flat_store_byte v[0:1], v2
+; VI-NEXT:    s_endpgm
 entry:
   %retval.0.i = load i64, ptr addrspace(5) %idx
   %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 %retval.0.i
@@ -187,16 +912,201 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}i8_mad_32:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
 define amdgpu_kernel void @i8_mad_32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_32:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @10
+; EG-NEXT:    ALU 9, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT:     MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT:     ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT:    ALU clause starting at 19:
+; EG-NEXT:     ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT:     ADD_INT * T0.X, KC0[3].X, T0.X,
+; EG-NEXT:    ALU clause starting at 21:
+; EG-NEXT:     BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT:     BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T1.W, T0.X, 0.0, literal.x,
+; EG-NEXT:     MULLO_INT * T0.X, PV.W, PV.Z,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT:     BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i8_mad_32:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 0 @8
+; CM-NEXT:    ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 1 @10
+; CM-NEXT:    ALU 12, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 8:
+; CM-NEXT:     VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT:    Fetch clause starting at 10:
+; CM-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; CM-NEXT:     VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT:    ALU clause starting at 14:
+; CM-NEXT:     LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT:     MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT:     ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT:    ALU clause starting at 19:
+; CM-NEXT:     ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT:     ADD_INT * T0.X, KC0[2].Z, T0.X,
+; CM-NEXT:    ALU clause starting at 21:
+; CM-NEXT:     BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT:     BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:     BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT:     ADD_INT * T0.W, PV.X, T0.Y,
+; CM-NEXT:     BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad_32:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_mov_b32 s24, SCRATCH_RSRC_DWORD0
+; GCN-NEXT:    s_mov_b32 s25, SCRATCH_RSRC_DWORD1
+; GCN-NEXT:    s_mov_b32 s26, -1
+; GCN-NEXT:    s_mov_b32 s27, 0xe8f000
+; GCN-NEXT:    s_add_u32 s24, s24, s11
+; GCN-NEXT:    s_addc_u32 s25, s25, 0
+; GCN-NEXT:    s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s9, s8, 4
+; GCN-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT:    v_mov_b32_e32 v0, s8
+; GCN-NEXT:    v_mov_b32_e32 v1, s9
+; GCN-NEXT:    buffer_load_dword v1, v1, s[24:27], 0 offen
+; GCN-NEXT:    buffer_load_dword v0, v0, s[24:27], 0 offen
+; GCN-NEXT:    s_mov_b32 s11, 0xf000
+; GCN-NEXT:    s_mov_b32 s14, 0
+; GCN-NEXT:    s_mov_b32 s15, s11
+; GCN-NEXT:    s_mov_b64 s[18:19], s[14:15]
+; GCN-NEXT:    s_mov_b64 s[22:23], s[14:15]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b64 s[12:13], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[16:17], s[4:5]
+; GCN-NEXT:    s_mov_b64 s[20:21], s[6:7]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT:    buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT:    buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64
+; GCN-NEXT:    s_mov_b32 s10, -1
+; GCN-NEXT:    s_mov_b32 s8, s0
+; GCN-NEXT:    s_mov_b32 s9, s1
+; GCN-NEXT:    s_waitcnt vmcnt(2)
+; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v2
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v3
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mad_u32_u24 v0, v1, v2, v0
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT:    buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT:    s_endpgm
+;
+; SI-LABEL: i8_mad_32:
+; SI:       ; %bb.0: ; %entry
+; SI-NEXT:    s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_load_dword s0, s[4:5], 0x44
+; SI-NEXT:    s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s90, -1
+; SI-NEXT:    s_mov_b32 s91, 0xe80000
+; SI-NEXT:    s_add_u32 s88, s88, s11
+; SI-NEXT:    s_addc_u32 s89, s89, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_add_i32 s1, s0, 4
+; SI-NEXT:    v_mov_b32_e32 v0, s0
+; SI-NEXT:    buffer_load_dword v4, v0, s[88:91], 0 offen
+; SI-NEXT:    v_mov_b32_e32 v0, s1
+; SI-NEXT:    buffer_load_dword v5, v0, s[88:91], 0 offen
+; SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    v_mov_b32_e32 v3, s5
+; SI-NEXT:    v_mov_b32_e32 v6, s7
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_waitcnt vmcnt(1)
+; SI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; SI-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
+; SI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; SI-NEXT:    v_add_u32_e32 v4, vcc, s6, v4
+; SI-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; SI-NEXT:    flat_load_sbyte v0, v[0:1]
+; SI-NEXT:    flat_load_sbyte v1, v[2:3]
+; SI-NEXT:    flat_load_sbyte v2, v[4:5]
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_mad_u16 v0, v0, v1, v2
+; SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: i8_mad_32:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_load_dword s0, s[4:5], 0x44
+; VI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s14, -1
+; VI-NEXT:    s_mov_b32 s15, 0xe80000
+; VI-NEXT:    s_add_u32 s12, s12, s11
+; VI-NEXT:    s_addc_u32 s13, s13, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_add_i32 s1, s0, 4
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    buffer_load_dword v4, v0, s[12:15], 0 offen
+; VI-NEXT:    v_mov_b32_e32 v0, s1
+; VI-NEXT:    buffer_load_dword v5, v0, s[12:15], 0 offen
+; VI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_mov_b32_e32 v3, s5
+; VI-NEXT:    v_mov_b32_e32 v6, s7
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_waitcnt vmcnt(1)
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s6, v4
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT:    flat_load_sbyte v0, v[0:1]
+; VI-NEXT:    flat_load_sbyte v1, v[2:3]
+; VI-NEXT:    flat_load_sbyte v2, v[4:5]
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mad_u16 v0, v0, v1, v2
+; VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT:    s_endpgm
 entry:
   %retval.0.i = load i64, ptr addrspace(5) %idx
   %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i
@@ -215,16 +1125,207 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}i8_mad_64:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
 define amdgpu_kernel void @i8_mad_64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_64:
+; EG:       ; %bb.0: ; %entry
+; EG-NEXT:    ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @8
+; EG-NEXT:    ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 1 @10
+; EG-NEXT:    ALU 11, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 8:
+; EG-NEXT:     VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT:    Fetch clause starting at 10:
+; EG-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT:     VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT:    ALU clause starting at 14:
+; EG-NEXT:     LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT:     MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT:     ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT:    ALU clause starting at 19:
+; EG-NEXT:     ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT:     ADD_INT * T0.X, KC0[3].X, T0.X,
+; EG-NEXT:    ALU clause starting at 21:
+; EG-NEXT:     BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT:     BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     BFE_INT T1.W, T0.X, 0.0, literal.x,
+; EG-NEXT:     MULLO_INT * T0.X, PV.W, PV.Z,
+; EG-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT:     BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT:    16(2.242078e-44), 2(2.802597e-45)
+; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
+; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
+;
+; CM-LABEL: i8_mad_64:
+; CM:       ; %bb.0: ; %entry
+; CM-NEXT:    ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 0 @8
+; CM-NEXT:    ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    TEX 1 @10
+; CM-NEXT:    ALU 13, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 8:
+; CM-NEXT:     VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT:    Fetch clause starting at 10:
+; CM-NEXT:     VTX_READ_8 T0.X, T0.X, 0, #1
+; CM-NEXT:     VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT:    ALU clause starting at 14:
+; CM-NEXT:     LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT:     MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT:     ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT:    ALU clause starting at 19:
+; CM-NEXT:     ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT:     ADD_INT * T0.X, KC0[2].Z, T0.X,
+; CM-NEXT:    ALU clause starting at 21:
+; CM-NEXT:     BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT:     BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT:     BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT:     ADD_INT * T0.W, PV.X, T0.Y,
+; CM-NEXT:     BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     LSHR T1.X, KC0[2].Y, literal.x,
+; CM-NEXT:     ASHR * T0.Y, PV.X, literal.y,
+; CM-NEXT:    2(2.802597e-45), 31(4.344025e-44)
+;
+; GCN-LABEL: i8_mad_64:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_mov_b32 s24, SCRATCH_RSRC_DWORD0
+; GCN-NEXT:    s_mov_b32 s25, SCRATCH_RSRC_DWORD1
+; GCN-NEXT:    s_mov_b32 s26, -1
+; GCN-NEXT:    s_mov_b32 s27, 0xe8f000
+; GCN-NEXT:    s_add_u32 s24, s24, s11
+; GCN-NEXT:    s_addc_u32 s25, s25, 0
+; GCN-NEXT:    s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_add_i32 s9, s8, 4
+; GCN-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT:    v_mov_b32_e32 v0, s8
+; GCN-NEXT:    v_mov_b32_e32 v1, s9
+; GCN-NEXT:    buffer_load_dword v1, v1, s[24:27], 0 offen
+; GCN-NEXT:    buffer_load_dword v0, v0, s[24:27], 0 offen
+; GCN-NEXT:    s_mov_b32 s11, 0xf000
+; GCN-NEXT:    s_mov_b32 s14, 0
+; GCN-NEXT:    s_mov_b32 s15, s11
+; GCN-NEXT:    s_mov_b64 s[18:19], s[14:15]
+; GCN-NEXT:    s_mov_b64 s[22:23], s[14:15]
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b64 s[12:13], s[2:3]
+; GCN-NEXT:    s_mov_b64 s[16:17], s[4:5]
+; GCN-NEXT:    s_mov_b64 s[20:21], s[6:7]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT:    buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT:    buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64
+; GCN-NEXT:    s_mov_b32 s10, -1
+; GCN-NEXT:    s_mov_b32 s8, s0
+; GCN-NEXT:    s_mov_b32 s9, s1
+; GCN-NEXT:    s_waitcnt vmcnt(2)
+; GCN-NEXT:    v_and_b32_e32 v1, 0xffff, v2
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_and_b32_e32 v2, 0xffff, v3
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mad_u32_u24 v0, v1, v2, v0
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NEXT:    s_endpgm
+;
+; SI-LABEL: i8_mad_64:
+; SI:       ; %bb.0: ; %entry
+; SI-NEXT:    s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT:    s_load_dword s0, s[4:5], 0x44
+; SI-NEXT:    s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT:    s_mov_b32 s90, -1
+; SI-NEXT:    s_mov_b32 s91, 0xe80000
+; SI-NEXT:    s_add_u32 s88, s88, s11
+; SI-NEXT:    s_addc_u32 s89, s89, 0
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_add_i32 s1, s0, 4
+; SI-NEXT:    v_mov_b32_e32 v0, s0
+; SI-NEXT:    buffer_load_dword v4, v0, s[88:91], 0 offen
+; SI-NEXT:    v_mov_b32_e32 v0, s1
+; SI-NEXT:    buffer_load_dword v5, v0, s[88:91], 0 offen
+; SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    v_mov_b32_e32 v3, s5
+; SI-NEXT:    v_mov_b32_e32 v6, s7
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_waitcnt vmcnt(1)
+; SI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; SI-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
+; SI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; SI-NEXT:    v_add_u32_e32 v4, vcc, s6, v4
+; SI-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; SI-NEXT:    flat_load_sbyte v0, v[0:1]
+; SI-NEXT:    flat_load_sbyte v1, v[2:3]
+; SI-NEXT:    flat_load_sbyte v2, v[4:5]
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_mad_u16 v0, v0, v1, v2
+; SI-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; SI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: i8_mad_64:
+; VI:       ; %bb.0: ; %entry
+; VI-NEXT:    s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT:    s_load_dword s0, s[4:5], 0x44
+; VI-NEXT:    s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT:    s_mov_b32 s14, -1
+; VI-NEXT:    s_mov_b32 s15, 0xe80000
+; VI-NEXT:    s_add_u32 s12, s12, s11
+; VI-NEXT:    s_addc_u32 s13, s13, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_add_i32 s1, s0, 4
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    buffer_load_dword v4, v0, s[12:15], 0 offen
+; VI-NEXT:    v_mov_b32_e32 v0, s1
+; VI-NEXT:    buffer_load_dword v5, v0, s[12:15], 0 offen
+; VI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_mov_b32_e32 v3, s5
+; VI-NEXT:    v_mov_b32_e32 v6, s7
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_waitcnt vmcnt(1)
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s6, v4
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT:    flat_load_sbyte v0, v[0:1]
+; VI-NEXT:    flat_load_sbyte v1, v[2:3]
+; VI-NEXT:    flat_load_sbyte v2, v[4:5]
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_mad_u16 v0, v0, v1, v2
+; VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT:    s_endpgm
 entry:
   %retval.0.i = load i64, ptr addrspace(5) %idx
   %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i
@@ -248,17 +1349,236 @@ entry:
 ; had a chance to form mul24. The mul combine would then see
 ; extractelement with no known bits and fail. All of the mul/add
 ; combos in this loop should form v_mad_u32_u24.
-
-; FUNC-LABEL: {{^}}mad24_known_bits_destroyed:
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
 define void @mad24_known_bits_destroyed(i32 %arg, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i32 %arg4, i32 %arg5, i32 %arg6, ptr addrspace(1) %arg7, ptr addrspace(1) %arg8) #0 {
+; EG-LABEL: mad24_known_bits_destroyed:
+; EG:       ; %bb.0: ; %bb
+; EG-NEXT:    ALU 21, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    LOOP_START_DX10 @11
+; EG-NEXT:    ALU 8, @34, KC0[], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T2.X, 0
+; EG-NEXT:    ALU 14, @43, KC0[], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 0
+; EG-NEXT:    ALU_PUSH_BEFORE 3, @58, KC0[], KC1[]
+; EG-NEXT:    JUMP @10 POP:1
+; EG-NEXT:    LOOP_BREAK @10
+; EG-NEXT:    POP @10 POP:1
+; EG-NEXT:    END_LOOP @2
+; EG-NEXT:    CF_END
+; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:     MOV * T0.W, KC0[5].X,
+; EG-NEXT:     MOV * T0.Z, KC0[4].W,
+; EG-NEXT:     MOV * T0.Y, KC0[4].Z,
+; EG-NEXT:     MOV T0.X, KC0[2].Y,
+; EG-NEXT:     AND_INT * T1.Y, KC0[4].X, literal.x,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T1.Z, KC0[3].W, literal.x,
+; EG-NEXT:     AND_INT T1.W, KC0[3].Z, literal.x,
+; EG-NEXT:     MOV * T2.W, KC0[7].Y,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     LSHR T1.X, PS, literal.x,
+; EG-NEXT:     AND_INT T2.Y, KC0[6].Y, literal.y,
+; EG-NEXT:     MOV T2.Z, KC0[6].X,
+; EG-NEXT:     MOV * T2.W, KC0[5].W,
+; EG-NEXT:    2(2.802597e-45), 16777215(2.350989e-38)
+; EG-NEXT:     MOV * T3.W, KC0[7].X,
+; EG-NEXT:     LSHR T2.X, PV.W, literal.x,
+; EG-NEXT:     MOV T3.Y, KC0[5].Z,
+; EG-NEXT:     MOV T3.Z, KC0[6].Z,
+; EG-NEXT:     MOV * T3.W, KC0[6].W,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT:     MOV * T4.W, KC0[4].Y,
+; EG-NEXT:    ALU clause starting at 34:
+; EG-NEXT:     MULLO_INT * T0.X, T0.X, T2.Y,
+; EG-NEXT:     ADD_INT * T4.W, PS, T3.Z,
+; EG-NEXT:     AND_INT * T4.W, PV.W, literal.x,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     MULLO_INT * T0.X, PV.W, T2.Y,
+; EG-NEXT:     MULLO_INT * T0.W, T0.W, T1.Y,
+; EG-NEXT:     MULLO_INT * T0.Z, T0.Z, T1.Z,
+; EG-NEXT:     MULLO_INT * T0.Y, T0.Y, T1.W,
+; EG-NEXT:     ADD_INT * T0.X, T0.X, T3.Z,
+; EG-NEXT:    ALU clause starting at 43:
+; EG-NEXT:     ADD_INT * T4.W, T0.Y, T3.Y,
+; EG-NEXT:     AND_INT T4.W, PV.W, literal.x,
+; EG-NEXT:     ADD_INT * T5.W, T0.Z, T2.W,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     AND_INT T0.Z, PS, literal.x,
+; EG-NEXT:     ADD_INT T0.W, T0.W, T2.Z,
+; EG-NEXT:     MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT T0.Y, PS, T3.Y,
+; EG-NEXT:     AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT:     MULLO_INT * T0.Z, PV.Z, T1.Z,
+; EG-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT:     ADD_INT T0.Z, PS, T2.W,
+; EG-NEXT:     MULLO_INT * T0.W, PV.W, T1.Y,
+; EG-NEXT:     ADD_INT * T0.W, PS, T2.Z,
+; EG-NEXT:    ALU clause starting at 58:
+; EG-NEXT:     ADD_INT * T3.W, T3.W, literal.x,
+; EG-NEXT:    -1(nan), 0(0.000000e+00)
+; EG-NEXT:     SETE_INT * T4.W, PV.W, 0.0,
+; EG-NEXT:     PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+;
+; CM-LABEL: mad24_known_bits_destroyed:
+; CM:       ; %bb.0: ; %bb
+; CM-NEXT:    ALU 22, @12, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    LOOP_START_DX10 @11
+; CM-NEXT:    ALU 23, @35, KC0[], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T2.X
+; CM-NEXT:    ALU 23, @59, KC0[], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT:    ALU_PUSH_BEFORE 3, @83, KC0[], KC1[]
+; CM-NEXT:    JUMP @10 POP:1
+; CM-NEXT:    LOOP_BREAK @10
+; CM-NEXT:    POP @10 POP:1
+; CM-NEXT:    END_LOOP @2
+; CM-NEXT:    CF_END
+; CM-NEXT:    ALU clause starting at 12:
+; CM-NEXT:     MOV * T0.W, KC0[5].X,
+; CM-NEXT:     MOV * T0.Z, KC0[4].W,
+; CM-NEXT:     MOV * T0.Y, KC0[4].Z,
+; CM-NEXT:     MOV T0.X, KC0[2].Y,
+; CM-NEXT:     AND_INT * T1.Y, KC0[4].X, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T1.Z, KC0[3].W, literal.x,
+; CM-NEXT:     AND_INT * T1.W, KC0[3].Z, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T2.Y, KC0[6].Y, literal.x,
+; CM-NEXT:     MOV T2.Z, KC0[6].X,
+; CM-NEXT:     MOV * T2.W, KC0[7].Y,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     LSHR T1.X, PV.W, literal.x,
+; CM-NEXT:     MOV T3.Y, KC0[5].W,
+; CM-NEXT:     MOV T3.Z, KC0[5].Z,
+; CM-NEXT:     MOV * T2.W, KC0[7].X,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:     LSHR T2.X, PV.W, literal.x,
+; CM-NEXT:     MOV T4.Y, KC0[6].Z,
+; CM-NEXT:     MOV T4.Z, KC0[6].W,
+; CM-NEXT:     MOV * T2.W, KC0[4].Y,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT:    ALU clause starting at 35:
+; CM-NEXT:     MULLO_INT T0.X, T0.X, T2.Y,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.X, T2.Y,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.X, T2.Y,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.X, T2.Y,
+; CM-NEXT:     ADD_INT * T2.W, PV.X, T4.Y,
+; CM-NEXT:     AND_INT * T2.W, PV.W, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X, T2.W, T2.Y,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T2.W, T2.Y,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T2.W, T2.Y,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T2.W, T2.Y,
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T0.W, T1.Y,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T1.Y,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T1.Y,
+; CM-NEXT:     MULLO_INT * T0.W, T0.W, T1.Y,
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T0.Z, T1.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.Z, T1.Z,
+; CM-NEXT:     MULLO_INT T0.Z, T0.Z, T1.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.Z, T1.Z,
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T0.Y, T1.W,
+; CM-NEXT:     MULLO_INT T0.Y, T0.Y, T1.W,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.Y, T1.W,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.Y, T1.W,
+; CM-NEXT:     ADD_INT * T0.X, T0.X, T4.Y,
+; CM-NEXT:    ALU clause starting at 59:
+; CM-NEXT:     ADD_INT * T2.W, T0.Y, T3.Z,
+; CM-NEXT:     ADD_INT T0.Z, T0.Z, T3.Y,
+; CM-NEXT:     AND_INT * T2.W, PV.W, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T2.W, T1.W,
+; CM-NEXT:     MULLO_INT T0.Y, T2.W, T1.W,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T2.W, T1.W,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T2.W, T1.W,
+; CM-NEXT:     ADD_INT T0.Y, PV.Y, T3.Z,
+; CM-NEXT:     ADD_INT T5.Z, T0.W, T2.Z, BS:VEC_021/SCL_122
+; CM-NEXT:     AND_INT * T0.W, T0.Z, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T0.W, T1.Z,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T1.Z,
+; CM-NEXT:     MULLO_INT T0.Z, T0.W, T1.Z,
+; CM-NEXT:     MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
+; CM-NEXT:     ADD_INT T0.Z, PV.Z, T3.Y,
+; CM-NEXT:     AND_INT * T0.W, T5.Z, literal.x,
+; CM-NEXT:    16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT:     MULLO_INT T0.X (MASKED), T0.W, T1.Y,
+; CM-NEXT:     MULLO_INT T0.Y (MASKED), T0.W, T1.Y,
+; CM-NEXT:     MULLO_INT T0.Z (MASKED), T0.W, T1.Y,
+; CM-NEXT:     MULLO_INT * T0.W, T0.W, T1.Y,
+; CM-NEXT:     ADD_INT * T0.W, PV.W, T2.Z,
+; CM-NEXT:    ALU clause starting at 83:
+; CM-NEXT:     ADD_INT * T4.Z, T4.Z, literal.x,
+; CM-NEXT:    -1(nan), 0(0.000000e+00)
+; CM-NEXT:     SETE_INT * T2.W, PV.Z, 0.0,
+; CM-NEXT:     PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+;
+; GCN-LABEL: mad24_known_bits_destroyed:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v5, v0
+; GCN-NEXT:    v_and_b32_e32 v0, 0xffffff, v13
+; GCN-NEXT:    v_and_b32_e32 v1, 0xffffff, v2
+; GCN-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
+; GCN-NEXT:    v_and_b32_e32 v3, 0xffffff, v4
+; GCN-NEXT:    s_mov_b64 s[8:9], 0
+; GCN-NEXT:    s_mov_b32 s6, 0
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s4, s6
+; GCN-NEXT:    s_mov_b32 s5, s6
+; GCN-NEXT:  .LBB9_1: ; %bb19
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    v_mad_u32_u24 v4, v5, v0, v14
+; GCN-NEXT:    s_waitcnt expcnt(0)
+; GCN-NEXT:    v_mad_u32_u24 v6, v6, v1, v10
+; GCN-NEXT:    v_mad_u32_u24 v7, v7, v2, v11
+; GCN-NEXT:    v_mad_u32_u24 v8, v8, v3, v12
+; GCN-NEXT:    v_add_i32_e32 v15, vcc, -1, v15
+; GCN-NEXT:    v_mad_u32_u24 v5, v4, v0, v14
+; GCN-NEXT:    v_mad_u32_u24 v6, v6, v1, v10
+; GCN-NEXT:    v_mad_u32_u24 v7, v7, v2, v11
+; GCN-NEXT:    v_mad_u32_u24 v8, v8, v3, v12
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
+; GCN-NEXT:    buffer_store_dword v5, v[16:17], s[4:7], 0 addr64
+; GCN-NEXT:    s_or_b64 s[8:9], vcc, s[8:9]
+; GCN-NEXT:    buffer_store_dwordx4 v[5:8], v[18:19], s[4:7], 0 addr64
+; GCN-NEXT:    s_andn2_b64 exec, exec, s[8:9]
+; GCN-NEXT:    s_cbranch_execnz .LBB9_1
+; GCN-NEXT:  ; %bb.2: ; %bb18
+; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: mad24_known_bits_destroyed:
+; GFX8:       ; %bb.0: ; %bb
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v5, v0
+; GFX8-NEXT:    v_and_b32_e32 v0, 0xffffff, v13
+; GFX8-NEXT:    v_and_b32_e32 v1, 0xffffff, v2
+; GFX8-NEXT:    v_and_b32_e32 v2, 0xffffff, v3
+; GFX8-NEXT:    v_and_b32_e32 v3, 0xffffff, v4
+; GFX8-NEXT:    s_mov_b64 s[4:5], 0
+; GFX8-NEXT:  .LBB9_1: ; %bb19
+; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT:    v_add_u32_e32 v15, vcc, -1, v15
+; GFX8-NEXT:    v_mad_u32_u24 v4, v5, v0, v14
+; GFX8-NEXT:    v_mad_u32_u24 v6, v6, v1, v10
+; GFX8-NEXT:    v_mad_u32_u24 v7, v7, v2, v11
+; GFX8-NEXT:    v_mad_u32_u24 v8, v8, v3, v12
+; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v15
+; GFX8-NEXT:    v_mad_u32_u24 v5, v4, v0, v14
+; GFX8-NEXT:    v_mad_u32_u24 v6, v6, v1, v10
+; GFX8-NEXT:    v_mad_u32_u24 v7, v7, v2, v11
+; GFX8-NEXT:    v_mad_u32_u24 v8, v8, v3, v12
+; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX8-NEXT:    flat_store_dword v[16:17], v5
+; GFX8-NEXT:    flat_store_dwordx4 v[18:19], v[5:8]
+; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX8-NEXT:    s_cbranch_execnz .LBB9_1
+; GFX8-NEXT:  ; %bb.2: ; %bb18
+; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %tmp = and i32 %arg4, 16777215
   %tmp9 = extractelement <4 x i32> %arg1, i64 1
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags_V1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags_V1.ll
new file mode 100644
index 0000000..610ce4f
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags_V1.ll
@@ -0,0 +1,18 @@
+; RUN: not opt -passes='print<dxil-root-signature>' %s -S -o - 2>&1 | FileCheck %s
+; On Version 1, the only valid flag is DataVolatile (2).
+target triple = "dxil-unknown-shadermodel6.0-compute"
+
+
+; CHECK: error: Invalid value for RootDescriptorFlag: 4
+; CHECK-NOT: Root Signature Definitions
+define void @main() #0 {
+entry:
+  ret void
+}
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
+
+
+!dx.rootsignatures = !{!2} ; list of function/root signature pairs
+!2 = !{ ptr @main, !3, i32 1 } ; function, root signature
+!3 = !{ !5 } ; list of root signature elements
+!5 = !{ !"RootCBV", i32 0, i32 1, i32 2, i32 4  }
diff --git a/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers-Invalid-Flag_V1.ll b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers-Invalid-Flag_V1.ll
new file mode 100644
index 0000000..76b60b8
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers-Invalid-Flag_V1.ll
@@ -0,0 +1,19 @@
+; RUN: not opt -passes='print<dxil-root-signature>' %s -S -o - 2>&1 | FileCheck %s
+
+
+target triple = "dxil-unknown-shadermodel6.0-compute"
+
+; CHECK: error: Invalid value for Static Sampler Flag: 1 
+; CHECK-NOT: Root Signature Definitions
+
+define void @main() #0 {
+entry:
+  ret void
+}
+attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }
+
+
+!dx.rootsignatures = !{!2} ; list of function/root signature pairs
+!2 = !{ ptr @main, !3, i32 1 } ; function, root signature
+!3 = !{ !5 } ; list of root signature elements
+!5 = !{ !"StaticSampler", i32 4, i32 2, i32 3, i32 5, float 0x3FF6CCCCC0000000, i32 9, i32 3, i32 2, float -1.280000e+02, float 1.280000e+02, i32 42, i32 0, i32 0, i32 1 }
diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
index 02118fb..b503da4 100644
--- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -72,7 +72,7 @@ define internal void @bar() {
 ; CHECK-NEXT:    [[OFFSET:%.*]] = ashr exact i64 [[TMP2]], 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr ptr, ptr addrspace(1) [[BEGIN]], i64 [[OFFSET]]
 ; CHECK-NEXT:    [[START:%.*]] = getelementptr inbounds ptr, ptr addrspace(1) [[TMP3]], i64 -1
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt ptr addrspace(1) [[START]], [[BEGIN]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp uge ptr addrspace(1) [[START]], [[BEGIN]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
 ; CHECK:       while.entry:
 ; CHECK-NEXT:    [[PTR:%.*]] = phi ptr addrspace(1) [ [[START]], [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index 1361d92..2e500d5 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -72,12 +72,12 @@
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
 #
-# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices
+# DEBUG-NEXT: G_ABDS (opcode [[G_ABDS:[0-9]+]]): 1 type index, 0 imm indices
 # DEBUG-NEXT:.. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT:.. imm index coverage check SKIPPED: user-defined predicate detected
 #
-# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices
-# DEBUG-NEXT:.. opcode 66 is aliased to 65
+# DEBUG-NEXT:G_ABDU (opcode [[G_ABDU:[0-9]+]]): 1 type index, 0 imm indices
+# DEBUG-NEXT:.. opcode [[G_ABDU]] is aliased to [[G_ABDS]]
 # DEBUG-NEXT:.. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT:.. imm index coverage check SKIPPED: user-defined predicate detected
 #
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_predicated_io/predicated_io_generic.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_predicated_io/predicated_io_generic.ll
new file mode 100644
index 0000000..a3127e8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_predicated_io/predicated_io_generic.ll
@@ -0,0 +1,36 @@
+; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_predicated_io %s -o - | FileCheck %s
+
+; CHECK-ERROR: LLVM ERROR: OpPredicated[Load/Store]INTEL
+; CHECK-ERROR-SAME: instructions require the following SPIR-V extension: SPV_INTEL_predicated_io
+
+; CHECK-DAG: Capability PredicatedIOINTEL
+; CHECK-DAG: Extension "SPV_INTEL_predicated_io"
+
+; CHECK-DAG: %[[Int32Ty:[0-9]+]] = OpTypeInt 32 0
+; CHECK-DAG: %[[IntPtrTy:[0-9]+]] = OpTypePointer CrossWorkgroup %[[Int32Ty]]
+; CHECK-DAG: %[[BoolTy:[0-9]+]] = OpTypeBool
+; CHECK-DAG: %[[VoidTy:[0-9]+]] = OpTypeVoid
+; CHECK: %[[LoadPtr:[0-9]+]] = OpFunctionParameter %[[IntPtrTy]]
+; CHECK: %[[StorePtr:[0-9]+]] = OpFunctionParameter %[[IntPtrTy]]
+; CHECK: %[[DefaultVal:[0-9]+]] = OpFunctionParameter %[[Int32Ty]]
+; CHECK: %[[StoreObj:[0-9]+]] = OpFunctionParameter %[[Int32Ty]]
+; CHECK: %[[Predicate:[0-9]+]] = OpFunctionParameter %[[BoolTy]]
+; CHECK: PredicatedLoadINTEL %[[Int32Ty]] %[[LoadPtr]] %[[Predicate]] %[[DefaultVal]]
+; CHECK: PredicatedLoadINTEL %[[Int32Ty]] %[[LoadPtr]] %[[Predicate]] %[[DefaultVal]] None
+; CHECK: PredicatedStoreINTEL %[[StorePtr]] %[[StoreObj]] %[[Predicate]]
+; CHECK: PredicatedStoreINTEL %[[StorePtr]] %[[StoreObj]] %[[Predicate]] None
+
+define spir_func void @foo(ptr addrspace(1) %load_pointer, ptr addrspace(1) %store_pointer, i32  %default_value, i32 %store_object, i1 zeroext %predicate) {
+entry:
+  %1 = call spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibi(ptr addrspace(1) %load_pointer, i1 %predicate, i32 %default_value)
+  %2 = call spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibii(ptr addrspace(1) %load_pointer, i1 %predicate, i32 %default_value, i32 0)
+  call spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiib(ptr addrspace(1) %store_pointer, i32 %store_object, i1 %predicate)
+  call spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiibi(ptr addrspace(1) %store_pointer, i32 %store_object, i1 %predicate, i32 0)
+  ret void
+}
+
+declare spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibi(ptr addrspace(1), i1, i32)
+declare spir_func i32 @_Z27__spirv_PredicatedLoadINTELPU3AS1Kibii(ptr addrspace(1), i1, i32, i32)
+declare spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiib(ptr addrspace(1), i32, i1)
+declare spir_func void @_Z28__spirv_PredicatedStoreINTELPU3AS1Kiibi(ptr addrspace(1), i32, i1, i32)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 52f57dc..a8d37be 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -434,7 +434,6 @@ entry:
 define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: stest_f16i16:
 ; CHECK:         .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128, v128, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    call __truncsfhf2
@@ -474,15 +473,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT:    local.tee 8
-; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT:    local.tee 9
-; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 10
-; CHECK-NEXT:    v128.and
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
@@ -495,13 +485,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    local.get 7
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
-; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    local.get 9
-; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    local.get 10
-; CHECK-NEXT:    v128.and
-; CHECK-NEXT:    i16x8.narrow_i32x4_u
+; CHECK-NEXT:    i16x8.narrow_i32x4_s
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
@@ -516,7 +500,6 @@ entry:
 define <8 x i16> @utest_f16i16(<8 x half> %x) {
 ; CHECK-LABEL: utest_f16i16:
 ; CHECK:         .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    call __truncsfhf2
@@ -556,9 +539,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 8
-; CHECK-NEXT:    i32x4.min_u
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
@@ -571,8 +551,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    local.get 7
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
-; CHECK-NEXT:    i32x4.min_u
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
@@ -1861,7 +1839,6 @@ entry:
 define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: stest_f16i16_mm:
 ; CHECK:         .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128, v128, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    call __truncsfhf2
@@ -1901,15 +1878,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT:    local.tee 8
-; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT:    local.tee 9
-; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 10
-; CHECK-NEXT:    v128.and
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.splat
@@ -1922,13 +1890,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    local.get 7
 ; CHECK-NEXT:    i32.trunc_sat_f32_s
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
-; CHECK-NEXT:    i32x4.min_s
-; CHECK-NEXT:    local.get 9
-; CHECK-NEXT:    i32x4.max_s
-; CHECK-NEXT:    local.get 10
-; CHECK-NEXT:    v128.and
-; CHECK-NEXT:    i16x8.narrow_i32x4_u
+; CHECK-NEXT:    i16x8.narrow_i32x4_s
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
@@ -1941,7 +1903,6 @@ entry:
 define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
 ; CHECK-LABEL: utest_f16i16_mm:
 ; CHECK:         .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 5
 ; CHECK-NEXT:    call __truncsfhf2
@@ -1981,9 +1942,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    call __extendhfsf2
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT:    local.tee 8
-; CHECK-NEXT:    i32x4.min_u
 ; CHECK-NEXT:    local.get 4
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.splat
@@ -1996,8 +1954,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    local.get 7
 ; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    i32x4.replace_lane 3
-; CHECK-NEXT:    local.get 8
-; CHECK-NEXT:    i32x4.min_u
 ; CHECK-NEXT:    i16x8.narrow_i32x4_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
diff --git a/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
new file mode 100644
index 0000000..f3f3ba9
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
+
+define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_signed:
+; CHECK:         .functype i16_signed (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %bb2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i8x16.narrow_i16x8_s
+; CHECK-NEXT:    # fallthrough-return
+bb2:
+  %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
+  %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
+  %3 = trunc nsw <16 x i16> %2 to <16 x i8>
+  ret <16 x i8> %3
+  ret <16 x i8> %3
+}
+
+define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed:
+; CHECK:         .functype i32_signed (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %bb2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i16x8.narrow_i32x4_s
+; CHECK-NEXT:    # fallthrough-return
+bb2:
+  %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
+  %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
+  %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @i32_signed_flipped(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed_flipped:
+; CHECK:         .functype i32_signed_flipped (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %bb2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i16x8.narrow_i32x4_s
+; CHECK-NEXT:    # fallthrough-return
+bb2:
+  %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> splat (i32 32767), <8 x i32> %0)
+  %2 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> splat (i32 -32768), <8 x i32> %1)
+  %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+  ret <8 x i16> %3
+}
+
+define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_unsigned:
+; CHECK:         .functype i16_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %bb2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i8x16.narrow_i16x8_u
+; CHECK-NEXT:    # fallthrough-return
+bb2:
+  %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
+  %2 = trunc nuw <16 x i16> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_unsigned:
+; CHECK:         .functype i32_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0: # %bb2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i16x8.narrow_i32x4_u
+; CHECK-NEXT:    # fallthrough-return
+bb2:
+  %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
+  %2 = trunc nsw <8 x i32> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
diff --git a/llvm/test/CodeGen/X86/and-mask-variable.ll b/llvm/test/CodeGen/X86/and-mask-variable.ll
new file mode 100644
index 0000000..d89f0db
--- /dev/null
+++ b/llvm/test/CodeGen/X86/and-mask-variable.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-NOBMI
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-BMI2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X86-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-NOBMI
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-BMI2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2,+fast-bextr < %s | FileCheck %s --check-prefixes=X64-BMI2
+
+define i32 @mask_pair(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: mask_pair:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    shrl %cl, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI2-LABEL: mask_pair:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: mask_pair:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    shrl %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i32 -1, %y
+  %and = and i32 %shl, %x
+  ret i32 %and
+}
+
+define i64 @mask_pair_64(i64 %x, i64 %y) nounwind {
+; X86-NOBMI-LABEL: mask_pair_64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl $-1, %edx
+; X86-NOBMI-NEXT:    movl $-1, %eax
+; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    je .LBB1_2
+; X86-NOBMI-NEXT:  # %bb.1:
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    xorl %eax, %eax
+; X86-NOBMI-NEXT:  .LBB1_2:
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI2-LABEL: mask_pair_64:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl $-1, %edx
+; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
+; X86-BMI2-NEXT:    testb $32, %cl
+; X86-BMI2-NEXT:    je .LBB1_2
+; X86-BMI2-NEXT:  # %bb.1:
+; X86-BMI2-NEXT:    movl %eax, %edx
+; X86-BMI2-NEXT:    xorl %eax, %eax
+; X86-BMI2-NEXT:  .LBB1_2:
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    retl
+;
+; X64-NOBMI-LABEL: mask_pair_64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    shrq %cl, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair_64:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i64 -1, %y
+  %and = and i64 %shl, %x
+  ret i64 %and
+}
+
+define i128 @mask_pair_128(i128 %x, i128 %y) nounwind {
+; X86-NOBMI-LABEL: mask_pair_128:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %ebx
+; X86-NOBMI-NEXT:    pushl %edi
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    subl $32, %esp
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NOBMI-NEXT:    movl $0, (%esp)
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    shrb $3, %dl
+; X86-NOBMI-NEXT:    andb $12, %dl
+; X86-NOBMI-NEXT:    negb %dl
+; X86-NOBMI-NEXT:    movsbl %dl, %ebx
+; X86-NOBMI-NEXT:    movl 24(%esp,%ebx), %edx
+; X86-NOBMI-NEXT:    movl 28(%esp,%ebx), %esi
+; X86-NOBMI-NEXT:    shldl %cl, %edx, %esi
+; X86-NOBMI-NEXT:    movl 16(%esp,%ebx), %edi
+; X86-NOBMI-NEXT:    movl 20(%esp,%ebx), %ebx
+; X86-NOBMI-NEXT:    shldl %cl, %ebx, %edx
+; X86-NOBMI-NEXT:    shldl %cl, %edi, %ebx
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shll %cl, %edi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %ebx
+; X86-NOBMI-NEXT:    movl %esi, 12(%eax)
+; X86-NOBMI-NEXT:    movl %edx, 8(%eax)
+; X86-NOBMI-NEXT:    movl %ebx, 4(%eax)
+; X86-NOBMI-NEXT:    movl %edi, (%eax)
+; X86-NOBMI-NEXT:    addl $32, %esp
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    popl %edi
+; X86-NOBMI-NEXT:    popl %ebx
+; X86-NOBMI-NEXT:    retl $4
+;
+; X86-BMI2-LABEL: mask_pair_128:
+; X86-BMI2:       # %bb.0:
+; X86-BMI2-NEXT:    pushl %ebx
+; X86-BMI2-NEXT:    pushl %edi
+; X86-BMI2-NEXT:    pushl %esi
+; X86-BMI2-NEXT:    subl $32, %esp
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $-1, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-BMI2-NEXT:    movl $0, (%esp)
+; X86-BMI2-NEXT:    movl %ecx, %edx
+; X86-BMI2-NEXT:    shrb $3, %dl
+; X86-BMI2-NEXT:    andb $12, %dl
+; X86-BMI2-NEXT:    negb %dl
+; X86-BMI2-NEXT:    movsbl %dl, %edi
+; X86-BMI2-NEXT:    movl 24(%esp,%edi), %edx
+; X86-BMI2-NEXT:    movl 28(%esp,%edi), %esi
+; X86-BMI2-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI2-NEXT:    movl 16(%esp,%edi), %ebx
+; X86-BMI2-NEXT:    movl 20(%esp,%edi), %edi
+; X86-BMI2-NEXT:    shldl %cl, %edi, %edx
+; X86-BMI2-NEXT:    shldl %cl, %ebx, %edi
+; X86-BMI2-NEXT:    shlxl %ecx, %ebx, %ecx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    movl %esi, 12(%eax)
+; X86-BMI2-NEXT:    movl %edx, 8(%eax)
+; X86-BMI2-NEXT:    movl %edi, 4(%eax)
+; X86-BMI2-NEXT:    movl %ecx, (%eax)
+; X86-BMI2-NEXT:    addl $32, %esp
+; X86-BMI2-NEXT:    popl %esi
+; X86-BMI2-NEXT:    popl %edi
+; X86-BMI2-NEXT:    popl %ebx
+; X86-BMI2-NEXT:    retl $4
+;
+; X64-NOBMI-LABEL: mask_pair_128:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rdx, %rcx
+; X64-NOBMI-NEXT:    movq $-1, %rdx
+; X64-NOBMI-NEXT:    movq $-1, %r8
+; X64-NOBMI-NEXT:    shlq %cl, %r8
+; X64-NOBMI-NEXT:    xorl %eax, %eax
+; X64-NOBMI-NEXT:    testb $64, %cl
+; X64-NOBMI-NEXT:    cmovneq %r8, %rdx
+; X64-NOBMI-NEXT:    cmoveq %r8, %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    andq %rsi, %rdx
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI2-LABEL: mask_pair_128:
+; X64-BMI2:       # %bb.0:
+; X64-BMI2-NEXT:    movq $-1, %rcx
+; X64-BMI2-NEXT:    shlxq %rdx, %rcx, %r8
+; X64-BMI2-NEXT:    xorl %eax, %eax
+; X64-BMI2-NEXT:    testb $64, %dl
+; X64-BMI2-NEXT:    cmovneq %r8, %rcx
+; X64-BMI2-NEXT:    cmoveq %r8, %rax
+; X64-BMI2-NEXT:    andq %rdi, %rax
+; X64-BMI2-NEXT:    andq %rsi, %rcx
+; X64-BMI2-NEXT:    movq %rcx, %rdx
+; X64-BMI2-NEXT:    retq
+  %shl = shl nsw i128 -1, %y
+  %and = and i128 %shl, %x
+  ret i128 %and
+}
diff --git a/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll
new file mode 100644
index 0000000..c302d41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 < %s -o - | FileCheck %s
+
+define i64 @ptrtoaddr(ptr %p) {
+; CHECK-LABEL: ptrtoaddr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    retq
+  %addr = ptrtoaddr ptr %p to i64
+  ret i64 %addr
+}
diff --git a/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll b/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll
new file mode 100644
index 0000000..92aedfe
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/instr-ref-opt-bisect2.ll
@@ -0,0 +1,36 @@
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=1 | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=10 | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=100 | FileCheck %s
+
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=1 -fast-isel=true | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=10 -fast-isel=true | FileCheck %s
+; RUN: llc %s -o - -stop-after=livedebugvalues -opt-bisect-limit=100 -fast-isel=true | FileCheck %s
+
+; This test has the same purpose as the instr-ref-opt-bisect.ll, to check if
+; during opt-bisect's optimisation level change we won't run into an assert.
+; This is simply testing different IR.
+
+; CHECK: DBG_VALUE
+
+target triple = "x86_64-pc-windows-msvc"
+
+define i1 @foo(i32 %arg) !dbg !3 {
+entry:
+    #dbg_value(i32 %arg, !4, !DIExpression(), !5)
+  switch i32 %arg, label %bb [
+    i32 810, label %bb
+  ], !dbg !5
+bb:
+  %a = load volatile i1, ptr null, align 1
+  ret i1 false
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "instr-ref-opt-bisect2.ll", directory: ".")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = distinct !DISubprogram(name: "instr-ref-opt-bisect2", file: !1, unit: !0)
+!4 = !DILocalVariable(name: "arg", arg: 2, scope: !3)
+!5 = !DILocation(line: 0, scope: !3)
diff --git a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
index 5f08552..0e382b2 100644
--- a/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
+++ b/llvm/test/Instrumentation/AllocToken/extralibfuncs.ll
@@ -38,7 +38,7 @@ entry:
   ret ptr %ptr1
 }
 
-!0 = !{!"int"}
+!0 = !{!"int", i1 0}
 ;.
-; CHECK: [[META0]] = !{!"int"}
+; CHECK: [[META0]] = !{!"int", i1 false}
 ;.
diff --git a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
index e023ab6b..19673da 100644
--- a/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
+++ b/llvm/test/Instrumentation/AllocToken/nonlibcalls.ll
@@ -79,7 +79,7 @@ entry:
   ret ptr %ptr1
 }
 
-!0 = !{!"int"}
+!0 = !{!"int", i1 0}
 ;.
-; CHECK: [[META0]] = !{!"int"}
+; CHECK: [[META0]] = !{!"int", i1 false}
 ;.
diff --git a/llvm/test/Instrumentation/AllocToken/remark.ll b/llvm/test/Instrumentation/AllocToken/remark.ll
index a2404526..f2eaa62 100644
--- a/llvm/test/Instrumentation/AllocToken/remark.ll
+++ b/llvm/test/Instrumentation/AllocToken/remark.ll
@@ -32,7 +32,7 @@ entry:
   ret ptr %ptr1
 }
 
-!0 = !{!"int"}
+!0 = !{!"int", i1 0}
 ;.
-; CHECK: [[META0]] = !{!"int"}
+; CHECK: [[META0]] = !{!"int", i1 false}
 ;.
diff --git a/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll b/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll
new file mode 100644
index 0000000..1f77648
--- /dev/null
+++ b/llvm/test/Instrumentation/AllocToken/typehashpointersplit.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=inferattrs,alloc-token -alloc-token-mode=typehashpointersplit -alloc-token-max=2 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare ptr @malloc(i64)
+
+define void @test_typehashpointersplit() sanitize_alloc_token {
+; CHECK-LABEL: define void @test_typehashpointersplit(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__alloc_token_malloc(i64 4, i64 0), !alloc_token [[META0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__alloc_token_malloc(i64 128, i64 0), !alloc_token [[META1:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call ptr @__alloc_token_malloc(i64 8, i64 1), !alloc_token [[META2:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call ptr @__alloc_token_malloc(i64 64, i64 1), !alloc_token [[META3:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+entry:
+  call ptr @malloc(i64 4), !alloc_token !0
+  call ptr @malloc(i64 128), !alloc_token !1
+  call ptr @malloc(i64 8), !alloc_token !2
+  call ptr @malloc(i64 64), !alloc_token !3
+  ret void
+}
+
+!0 = !{!"int", i1 0}
+!1 = !{!"Foo", i1 0}
+!2 = !{!"int*", i1 1}
+!3 = !{!"Foo", i1 1}
+;.
+; CHECK: [[META0]] = !{!"int", i1 false}
+; CHECK: [[META1]] = !{!"Foo", i1 false}
+; CHECK: [[META2]] = !{!"int*", i1 true}
+; CHECK: [[META3]] = !{!"Foo", i1 true}
+;.
diff --git a/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s b/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s
new file mode 100644
index 0000000..f8baf37
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9a-sysp-diagnostics.s
@@ -0,0 +1,95 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ERROR
+
+tlbip ALLE1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE2OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ALLE3OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip ASIDE1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip PAALL
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip PAALLOS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip RPALOS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip RPAOS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLE1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLS12E1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1IS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1ISNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1NXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1OS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
+tlbip VMALLWS2E1OSNXS
+// CHECK-ERROR: error: invalid operand for TLBIP instruction
diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll
index 6fa57f1..3536932 100644
--- a/llvm/test/Other/new-pm-print-pipeline.ll
+++ b/llvm/test/Other/new-pm-print-pipeline.ll
@@ -50,7 +50,7 @@
 ; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)
 
 ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
-; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
+; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;no-switch-to-arithmetic;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-arithmetic;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
 
 ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
 ; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
diff --git a/llvm/test/Transforms/GVN/ptrtoaddr.ll b/llvm/test/Transforms/GVN/ptrtoaddr.ll
new file mode 100644
index 0000000..6d02bc6
--- /dev/null
+++ b/llvm/test/Transforms/GVN/ptrtoaddr.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=gvn < %s | FileCheck %s
+
+define i64 @ptrtoaddr_same(ptr %p) {
+; CHECK-LABEL: define i64 @ptrtoaddr_same(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[J:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT:    ret i64 0
+;
+  %i = ptrtoaddr ptr %p to i64
+  %j = ptrtoaddr ptr %p to i64
+  %sub = sub i64 %i, %j
+  ret i64 %sub
+}
+
+; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ
+; in result type for the same input.
+define i64 @ptrtoaddr_different(ptr %p, ptr %p2) {
+; CHECK-LABEL: define i64 @ptrtoaddr_different(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:    [[I:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT:    [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[I]], [[J]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %i = ptrtoaddr ptr %p to i64
+  %j = ptrtoaddr ptr %p2 to i64
+  %sub = sub i64 %i, %j
+  ret i64 %sub
+}
diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
new file mode 100644
index 0000000..2b22965
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
@@ -0,0 +1,323 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=instcombine %s -S | FileCheck %s
+
+declare void @clobber.i32(i32)
+
+define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT:    ret i16 [[SEL_V]]
+;
+  %upper.zext = zext i16 %upper to i32
+  %upper.shl = shl nuw i32 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i32
+  %pack = or disjoint i32 %upper.shl, %lower.zext
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT:    ret i16 [[SEL_V]]
+;
+  %upper.zext = zext i16 %upper to i32
+  %upper.shl = shl nuw i32 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i32
+  %pack = or disjoint i32 %lower.zext, %upper.shl
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.range(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %upper.shl = shl nuw i32 %upper, 16
+  %pack = or disjoint i32 %upper.shl, %lower
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+define i16 @selective_shift_16.range.commute(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.range.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %upper.shl = shl nuw i32 %upper, 16
+  %pack = or disjoint i32 %lower, %upper.shl
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_16.masked(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT:    [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %upper.zext = zext i16 %upper to i32
+  %upper.shl = shl nuw i32 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i32
+  %pack = or disjoint i32 %lower.zext, %upper.shl
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %sel.masked = and i32 %sel, 65535
+  ret i32 %sel.masked
+}
+
+define i32 @selective_shift_16.masked.commute(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_16.masked.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT:    [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+  %upper.zext = zext i16 %upper to i32
+  %upper.shl = shl nuw i32 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i32
+  %pack = or disjoint i32 %upper.shl, %lower.zext
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %sel.masked = and i32 %sel, 65535
+  ret i32 %sel.masked
+}
+
+define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
+; CHECK-LABEL: define <2 x i16> @selective_shift.v16(
+; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16)
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer
+; CHECK-NEXT:    [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]]
+; CHECK-NEXT:    ret <2 x i16> [[SEL_V]]
+;
+  %upper.zext = zext <2 x i16> %upper to <2 x i32>
+  %upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16)
+  %lower.zext = zext <2 x i16> %lower to <2 x i32>
+  %pack = or disjoint <2 x i32> %upper.shl, %lower.zext
+  %mask.bit = and <2 x i32> %mask, splat(i32 16)
+  %sel = lshr <2 x i32> %pack, %mask.bit
+  %trunc = trunc <2 x i32> %sel to <2 x i16>
+  ret <2 x i16> %trunc
+}
+
+define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.wide(
+; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i64 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT:    ret i16 [[SEL_V]]
+;
+  %upper.zext = zext i16 %upper to i64
+  %upper.shl = shl nuw i64 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i64
+  %pack = or disjoint i64 %upper.shl, %lower.zext
+  %mask.bit = and i64 %mask, 16
+  %sel = lshr i64 %pack, %mask.bit
+  %trunc = trunc i64 %sel to i16
+  ret i16 %trunc
+}
+
+; narrow zext type blocks fold
+define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.narrow(
+; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24
+; CHECK-NEXT:    [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16
+; CHECK-NEXT:    [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24
+; CHECK-NEXT:    [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i24 [[MASK]], 16
+; CHECK-NEXT:    [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %upper.zext = zext i16 %upper to i24
+  %upper.shl = shl i24 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i24
+  %pack = or disjoint i24 %upper.shl, %lower.zext
+  %mask.bit = and i24 %mask, 16
+  %sel = lshr i24 %pack, %mask.bit
+  %trunc = trunc i24 %sel to i16
+  ret i16 %trunc
+}
+
+; %lower's upper bits block fold
+define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16_norange(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16
+; CHECK-NEXT:    [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]]
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %upper.shl = shl nuw i32 %upper, 16
+  %pack = or i32 %upper.shl, %lower
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.0(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT:    call void @clobber.i32(i32 [[UPPER_ZEXT]])
+; CHECK-NEXT:    [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT:    call void @clobber.i32(i32 [[LOWER_ZEXT]])
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %upper.zext = zext i16 %upper to i32
+  call void @clobber.i32(i32 %upper.zext)
+  %upper.shl = shl nuw i32 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i32
+  call void @clobber.i32(i32 %lower.zext)
+  %pack = or disjoint i32 %upper.shl, %lower.zext
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+; multi-use of %pack blocks fold
+define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.1(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT:    [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT:    [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT:    [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT:    call void @clobber.i32(i32 [[PACK]])
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %upper.zext = zext i16 %upper to i32
+  %upper.shl = shl nuw i32 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i32
+  %pack = or disjoint i32 %upper.shl, %lower.zext
+  call void @clobber.i32(i32 %pack)
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+; non-truncated use of %sel blocks fold
+define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.2(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT:    [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT:    [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT:    [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT:    [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT:    call void @clobber.i32(i32 [[SEL]])
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT:    ret i16 [[TRUNC]]
+;
+  %upper.zext = zext i16 %upper to i32
+  %upper.shl = shl nuw i32 %upper.zext, 16
+  %lower.zext = zext i16 %lower to i32
+  %pack = or disjoint i32 %upper.shl, %lower.zext
+  %mask.bit = and i32 %mask, 16
+  %sel = lshr i32 %pack, %mask.bit
+  call void @clobber.i32(i32 %sel)
+  %trunc = trunc i32 %sel to i16
+  ret i16 %trunc
+}
+
+; bitwidth must be a power of 2 to fold
+define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) {
+; CHECK-LABEL: define i24 @selective_shift_24(
+; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48
+; CHECK-NEXT:    [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24
+; CHECK-NEXT:    [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48
+; CHECK-NEXT:    [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i48 [[MASK]], 24
+; CHECK-NEXT:    [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24
+; CHECK-NEXT:    ret i24 [[TRUNC]]
+;
+  %upper.zext = zext i24 %upper to i48
+  %upper.shl = shl nuw i48 %upper.zext, 24
+  %lower.zext = zext i24 %lower to i48
+  %pack = or disjoint i48 %upper.shl, %lower.zext
+  %mask.bit = and i48 %mask, 24
+  %sel = lshr i48 %pack, %mask.bit
+  %trunc = trunc i48 %sel to i24
+  ret i24 %trunc
+}
+
+define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_32(
+; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT:    ret i32 [[SEL_V]]
+;
+  %upper.zext = zext i32 %upper to i64
+  %upper.shl = shl nuw i64 %upper.zext, 32
+  %lower.zext = zext i32 %lower to i64
+  %pack = or disjoint i64 %upper.shl, %lower.zext
+  %mask.bit = and i64 %mask, 32
+  %sel = lshr i64 %pack, %mask.bit
+  %trunc = trunc i64 %sel to i32
+  ret i32 %trunc
+}
+
+define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_32.commute(
+; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT:    [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
+; CHECK-NEXT:    [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT:    [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT:    ret i32 [[SEL_V]]
+;
+  %upper.zext = zext i32 %upper to i64
+  %upper.shl = shl nuw i64 %upper.zext, 32
+  %lower.zext = zext i32 %lower to i64
+  %pack = or disjoint i64 %lower.zext, %upper.shl
+  %mask.bit = and i64 %mask, 32
+  %sel = lshr i64 %pack, %mask.bit
+  %trunc = trunc i64 %sel to i32
+  ret i32 %trunc
+}
diff --git a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll
deleted file mode 100644
index 72bc543..0000000
--- a/llvm/test/Transforms/LoopRotate/multiple-deopt-exits.ll
+++ /dev/null
@@ -1,164 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S < %s -passes='loop(loop-rotate)' -loop-rotate-multi=true | FileCheck %s
-
-; Test loop rotation with multiple exits, some of them - deoptimizing.
-; We should end up with a latch which exit is non-deoptimizing, so we should rotate
-; more than once.
-
-declare i32 @llvm.experimental.deoptimize.i32(...)
-
-define i32 @test_cond_with_one_deopt_exit(ptr nonnull %a, i64 %x) {
-; Rotation done twice.
-; Latch should be at the 2nd condition (for.cond2), exiting to %return.
-;
-; CHECK-LABEL: @test_cond_with_one_deopt_exit(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_A_IDX3:%.*]] = load i32, ptr %a, align 4
-; CHECK-NEXT:    [[ZERO_CHECK4:%.*]] = icmp eq i32 [[VAL_A_IDX3]], 0
-; CHECK-NEXT:    br i1 [[ZERO_CHECK4]], label %deopt.exit, label %for.cond2.lr.ph
-; CHECK:       for.cond2.lr.ph:
-; CHECK-NEXT:    [[FOR_CHECK8:%.*]] = icmp ult i64 0, %x
-; CHECK-NEXT:    br i1 [[FOR_CHECK8]], label %for.body.lr.ph, label %return
-; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    br label %for.body
-; CHECK:       for.cond2:
-; CHECK:         [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x
-; CHECK-NEXT:    br i1 [[FOR_CHECK]], label %for.body, label %for.cond2.return_crit_edge
-; CHECK:       for.body:
-; CHECK:         br label %for.tail
-; CHECK:       for.tail:
-; CHECK:         [[VAL_A_IDX:%.*]] = load i32, ptr
-; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0
-; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label %for.cond1.deopt.exit_crit_edge, label %for.cond2
-; CHECK:       for.cond2.return_crit_edge:
-; CHECK-NEXT:    {{%.*}} = phi i32
-; CHECK-NEXT:    br label %return
-; CHECK:       return:
-; CHECK-NEXT:    [[SUM_LCSSA2:%.*]] = phi i32
-; CHECK-NEXT:    ret i32 [[SUM_LCSSA2]]
-; CHECK:       for.cond1.deopt.exit_crit_edge:
-; CHECK-NEXT:    {{%.*}} = phi i32
-; CHECK-NEXT:    br label %deopt.exit
-; CHECK:       deopt.exit:
-; CHECK:         [[DEOPT_VAL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 {{%.*}}) ]
-; CHECK-NEXT:    ret i32 [[DEOPT_VAL]]
-;
-entry:
-  br label %for.cond1
-
-for.cond1:
-  %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ]
-  %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ]
-  %a.idx = getelementptr inbounds i32, ptr %a, i64 %idx
-  %val.a.idx = load i32, ptr %a.idx, align 4
-  %zero.check = icmp eq i32 %val.a.idx, 0
-  br i1 %zero.check, label %deopt.exit, label %for.cond2
-
-for.cond2:
-  %for.check = icmp ult i64 %idx, %x
-  br i1 %for.check, label %for.body, label %return
-
-for.body:
-  br label %for.tail
-
-for.tail:
-  %sum.next = add i32 %sum, %val.a.idx
-  %idx.next = add nuw nsw i64 %idx, 1
-  br label %for.cond1
-
-return:
-  ret i32 %sum
-
-deopt.exit:
-  %deopt.val = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ]
-  ret i32 %deopt.val
-}
-
-define i32 @test_cond_with_two_deopt_exits(ptr nonnull %a, i64 %x) {
-; Rotation done three times.
-; Latch should be at the 3rd condition (for.cond3), exiting to %return.
-;
-; CHECK-LABEL: @test_cond_with_two_deopt_exits(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_IDX_DEREF4:%.*]] = load ptr, ptr %a
-; CHECK-NEXT:    [[NULL_CHECK5:%.*]] = icmp eq ptr [[A_IDX_DEREF4]], null
-; CHECK-NEXT:    br i1 [[NULL_CHECK5]], label %deopt.exit1, label %for.cond2.lr.ph
-; CHECK:       for.cond2.lr.ph:
-; CHECK-NEXT:    [[VAL_A_IDX9:%.*]] = load i32, ptr [[A_IDX_DEREF4]], align 4
-; CHECK-NEXT:    [[ZERO_CHECK10:%.*]] = icmp eq i32 [[VAL_A_IDX9]], 0
-; CHECK-NEXT:    br i1 [[ZERO_CHECK10]], label %deopt.exit2, label %for.cond3.lr.ph
-; CHECK:       for.cond3.lr.ph:
-; CHECK-NEXT:    [[FOR_CHECK14:%.*]] = icmp ult i64 0, %x
-; CHECK-NEXT:    br i1 [[FOR_CHECK14]], label %for.body.lr.ph, label %return
-; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    br label %for.body
-; CHECK:       for.cond2:
-; CHECK:         [[VAL_A_IDX:%.*]] = load i32, ptr
-; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[VAL_A_IDX]], 0
-; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label %for.cond2.deopt.exit2_crit_edge, label %for.cond3
-; CHECK:       for.cond3:
-; CHECK:         [[FOR_CHECK:%.*]] = icmp ult i64 {{%.*}}, %x
-; CHECK-NEXT:    br i1 [[FOR_CHECK]], label %for.body, label %for.cond3.return_crit_edge
-; CHECK:       for.body:
-; CHECK:         br label %for.tail
-; CHECK:       for.tail:
-; CHECK:         [[IDX_NEXT:%.*]] = add nuw nsw i64 {{%.*}}, 1
-; CHECK:         [[NULL_CHECK:%.*]] = icmp eq ptr {{%.*}}, null
-; CHECK-NEXT:    br i1 [[NULL_CHECK]], label %for.cond1.deopt.exit1_crit_edge, label %for.cond2
-; CHECK:       for.cond3.return_crit_edge:
-; CHECK-NEXT:    [[SPLIT18:%.*]] = phi i32
-; CHECK-NEXT:    br label %return
-; CHECK:       return:
-; CHECK-NEXT:    [[SUM_LCSSA2:%.*]] = phi i32
-; CHECK-NEXT:    ret i32 [[SUM_LCSSA2]]
-; CHECK:       for.cond1.deopt.exit1_crit_edge:
-; CHECK-NEXT:    br label %deopt.exit1
-; CHECK:       deopt.exit1:
-; CHECK-NEXT:    [[DEOPT_VAL1:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ]
-; CHECK-NEXT:    ret i32 [[DEOPT_VAL1]]
-; CHECK:       for.cond2.deopt.exit2_crit_edge:
-; CHECK-NEXT:    [[SPLIT:%.*]] = phi i32
-; CHECK-NEXT:    br label %deopt.exit2
-; CHECK:       deopt.exit2:
-; CHECK-NEXT:    [[VAL_A_IDX_LCSSA:%.*]] = phi i32
-; CHECK-NEXT:    [[DEOPT_VAL2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[VAL_A_IDX_LCSSA]]) ]
-; CHECK-NEXT:    ret i32 [[DEOPT_VAL2]]
-;
-entry:
-  br label %for.cond1
-
-for.cond1:
-  %idx = phi i64 [ 0, %entry ], [ %idx.next, %for.tail ]
-  %sum = phi i32 [ 0, %entry ], [ %sum.next, %for.tail ]
-  %a.idx = getelementptr inbounds ptr, ptr %a, i64 %idx
-  %a.idx.deref = load ptr, ptr %a.idx
-  %null.check = icmp eq ptr %a.idx.deref, null
-  br i1 %null.check, label %deopt.exit1, label %for.cond2
-
-for.cond2:
-  %val.a.idx = load i32, ptr %a.idx.deref, align 4
-  %zero.check = icmp eq i32 %val.a.idx, 0
-  br i1 %zero.check, label %deopt.exit2, label %for.cond3
-
-for.cond3:
-  %for.check = icmp ult i64 %idx, %x
-  br i1 %for.check, label %for.body, label %return
-
-for.body:
-  br label %for.tail
-
-for.tail:
-  %sum.next = add i32 %sum, %val.a.idx
-  %idx.next = add nuw nsw i64 %idx, 1
-  br label %for.cond1
-
-return:
-  ret i32 %sum
-
-deopt.exit1:
-  %deopt.val1 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0) ]
-  ret i32 %deopt.val1
-deopt.exit2:
-  %deopt.val2 = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %val.a.idx) ]
-  ret i32 %deopt.val2
-}
diff --git a/llvm/test/Transforms/LoopRotate/multiple-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-exits.ll
deleted file mode 100644
index 748700c..0000000
--- a/llvm/test/Transforms/LoopRotate/multiple-exits.ll
+++ /dev/null
@@ -1,236 +0,0 @@
-; RUN: opt -S -passes=loop-rotate < %s -verify-loop-info -verify-dom-info -verify-memoryssa | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
-
-; PR7447
-define i32 @test1(ptr nocapture %a) nounwind readonly {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.cond1, %entry
-  %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ]
-  %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ]
-  br i1 %i.0, label %for.cond1, label %return
-
-for.cond1:                                        ; preds = %for.cond, %land.rhs
-  %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ]
-  %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ]
-  %cmp2 = icmp ult i32 %i.1, 100
-  br i1 %cmp2, label %land.rhs, label %for.cond
-
-land.rhs:                                         ; preds = %for.cond1
-  %conv = zext i32 %i.1 to i64
-  %arrayidx = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 %conv
-  %0 = load i32, ptr %arrayidx, align 4
-  %add = add i32 %0, %sum.1
-  %cmp4 = icmp ugt i32 %add, 1000
-  %inc = add i32 %i.1, 1
-  br i1 %cmp4, label %return, label %for.cond1
-
-return:                                           ; preds = %for.cond, %land.rhs
-  %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ]
-  ret i32 %retval.0
-
-; CHECK-LABEL: @test1(
-; CHECK: for.cond1.preheader:
-; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ]
-; CHECK: br label %for.cond1
-
-; CHECK: for.cond1:
-; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ]
-; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ]
-; CHECK: %cmp2 = icmp ult i32 %i.1, 100
-; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit
-}
-
-define void @test2(i32 %x) nounwind {
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %if.end, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
-  %cmp = icmp eq i32 %i.0, %x
-  br i1 %cmp, label %return.loopexit, label %for.body
-
-for.body:                                         ; preds = %for.cond
-  %call = tail call i32 @foo(i32 %i.0) nounwind
-  %tobool = icmp eq i32 %call, 0
-  br i1 %tobool, label %if.end, label %a
-
-if.end:                                           ; preds = %for.body
-  %call1 = tail call i32 @foo(i32 42) nounwind
-  %inc = add i32 %i.0, 1
-  br label %for.cond
-
-a:                                                ; preds = %for.body
-  %call2 = tail call i32 @bar(i32 1) nounwind
-  br label %return
-
-return.loopexit:                                  ; preds = %for.cond
-  br label %return
-
-return:                                           ; preds = %return.loopexit, %a
-  ret void
-
-; CHECK-LABEL: @test2(
-; CHECK: if.end:
-; CHECK: %inc = add i32 %i.02, 1
-; CHECK: %cmp = icmp eq i32 %inc, %x
-; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body
-}
-
-declare i32 @foo(i32)
-
-declare i32 @bar(i32)
-
-@_ZTIi = external constant ptr
-
-; Verify dominators.
-define void @test3(i32 %x) personality ptr @__gxx_personality_v0 {
-entry:
-  %cmp2 = icmp eq i32 0, %x
-  br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph
-
-for.body.lr.ph:                                   ; preds = %entry
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.lr.ph, %for.inc
-  %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
-  invoke void @_Z3fooi(i32 %i.03)
-          to label %for.inc unwind label %lpad
-
-for.inc:                                          ; preds = %for.body
-  %inc = add i32 %i.03, 1
-  %cmp = icmp eq i32 %inc, %x
-  br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body
-
-lpad:                                             ; preds = %for.body
-  %0 = landingpad { ptr, i32 }
-          catch ptr @_ZTIi
-  %1 = extractvalue { ptr, i32 } %0, 0
-  %2 = extractvalue { ptr, i32 } %0, 1
-  %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTIi) nounwind
-  %matches = icmp eq i32 %2, %3
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %lpad
-  %4 = tail call ptr @__cxa_begin_catch(ptr %1) nounwind
-  br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph
-
-for.body.i.lr.ph:                                 ; preds = %catch
-  br label %for.body.i
-
-for.body.i:                                       ; preds = %for.body.i.lr.ph, %for.inc.i
-  %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ]
-  invoke void @_Z3fooi(i32 %i.0.i1)
-          to label %for.inc.i unwind label %lpad.i
-
-for.inc.i:                                        ; preds = %for.body.i
-  %inc.i = add i32 %i.0.i1, 1
-  %cmp.i = icmp eq i32 %inc.i, 0
-  br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i
-
-lpad.i:                                           ; preds = %for.body.i
-  %5 = landingpad { ptr, i32 }
-          catch ptr @_ZTIi
-  %6 = extractvalue { ptr, i32 } %5, 0
-  %7 = extractvalue { ptr, i32 } %5, 1
-  %matches.i = icmp eq i32 %7, %3
-  br i1 %matches.i, label %catch.i, label %lpad1.body
-
-catch.i:                                          ; preds = %lpad.i
-  %8 = tail call ptr @__cxa_begin_catch(ptr %6) nounwind
-  invoke void @test3(i32 0)
-          to label %invoke.cont2.i unwind label %lpad1.i
-
-invoke.cont2.i:                                   ; preds = %catch.i
-  tail call void @__cxa_end_catch() nounwind
-  br label %invoke.cont2
-
-lpad1.i:                                          ; preds = %catch.i
-  %9 = landingpad { ptr, i32 }
-          cleanup
-  %10 = extractvalue { ptr, i32 } %9, 0
-  %11 = extractvalue { ptr, i32 } %9, 1
-  tail call void @__cxa_end_catch() nounwind
-  br label %lpad1.body
-
-for.cond.i.invoke.cont2.loopexit_crit_edge:       ; preds = %for.inc.i
-  br label %invoke.cont2.loopexit
-
-invoke.cont2.loopexit:                            ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch
-  br label %invoke.cont2
-
-invoke.cont2:                                     ; preds = %invoke.cont2.loopexit, %invoke.cont2.i
-  tail call void @__cxa_end_catch() nounwind
-  br label %try.cont
-
-for.cond.try.cont.loopexit_crit_edge:             ; preds = %for.inc
-  br label %try.cont.loopexit
-
-try.cont.loopexit:                                ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry
-  br label %try.cont
-
-try.cont:                                         ; preds = %try.cont.loopexit, %invoke.cont2
-  ret void
-
-lpad1.body:                                       ; preds = %lpad1.i, %lpad.i
-  %exn.slot.0.i = phi ptr [ %10, %lpad1.i ], [ %6, %lpad.i ]
-  %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ]
-  tail call void @__cxa_end_catch() nounwind
-  br label %eh.resume
-
-eh.resume:                                        ; preds = %lpad1.body, %lpad
-  %exn.slot.0 = phi ptr [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ]
-  %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ]
-  %lpad.val = insertvalue { ptr, i32 } undef, ptr %exn.slot.0, 0
-  %lpad.val5 = insertvalue { ptr, i32 } %lpad.val, i32 %ehselector.slot.0, 1
-  resume { ptr, i32 } %lpad.val5
-}
-
-declare void @_Z3fooi(i32)
-
-declare i32 @__gxx_personality_v0(...)
-
-declare i32 @llvm.eh.typeid.for(ptr) nounwind readnone
-
-declare ptr @__cxa_begin_catch(ptr)
-
-declare void @__cxa_end_catch()
-
-define void @test4(i1 %arg) nounwind uwtable {
-entry:
-  br label %"7"
-
-"3":                                              ; preds = %"7"
-  br i1 %arg, label %"31", label %"4"
-
-"4":                                              ; preds = %"3"
-  %. = select i1 undef, float 0x3F50624DE0000000, float undef
-  %0 = add i32 %1, 1
-  br label %"7"
-
-"7":                                              ; preds = %"4", %entry
-  %1 = phi i32 [ %0, %"4" ], [ 0, %entry ]
-  %2 = icmp slt i32 %1, 100
-  br i1 %2, label %"3", label %"8"
-
-"8":                                              ; preds = %"7"
-  br i1 %arg, label %"9", label %"31"
-
-"9":                                              ; preds = %"8"
-  br label %"33"
-
-"27":                                             ; preds = %"31"
-  unreachable
-
-"31":                                             ; preds = %"8", %"3"
-  br i1 %arg, label %"27", label %"32"
-
-"32":                                             ; preds = %"31"
-  br label %"33"
-
-"33":                                             ; preds = %"32", %"9"
-  ret void
-}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
index f5329cf..c225ede5 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll
@@ -580,6 +580,201 @@ exit:
   ret double %accum
 }
 
+define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %src, ptr noalias %src.2, ptr noalias %dst) #0 {
+; I64-LABEL: define void @loaded_address_used_by_load_through_blend(
+; I64-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] {
+; I64-NEXT:  [[ENTRY:.*]]:
+; I64-NEXT:    br label %[[LOOP_HEADER:.*]]
+; I64:       [[LOOP_HEADER]]:
+; I64-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; I64-NEXT:    [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP_LATCH]] ]
+; I64-NEXT:    [[IV_1:%.*]] = add i64 [[IV]], 1
+; I64-NEXT:    [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]]
+; I64-NEXT:    [[L_SRC:%.*]] = load float, ptr [[GEP_SRC]], align 4
+; I64-NEXT:    [[C:%.*]] = fcmp oeq float [[L_SRC]], 0.000000e+00
+; I64-NEXT:    br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
+; I64:       [[THEN]]:
+; I64-NEXT:    [[IV_MUL:%.*]] = mul i64 [[IV_1]], [[START]]
+; I64-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[IV_MUL]]
+; I64-NEXT:    br label %[[LOOP_LATCH]]
+; I64:       [[LOOP_LATCH]]:
+; I64-NEXT:    [[MERGE_GEP:%.*]] = phi ptr [ [[GEP_SRC_2]], %[[THEN]] ], [ [[SRC_2]], %[[LOOP_HEADER]] ]
+; I64-NEXT:    [[L_2:%.*]] = load float, ptr [[MERGE_GEP]], align 4
+; I64-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
+; I64-NEXT:    store float [[L_2]], ptr [[GEP_DST]], align 4
+; I64-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; I64-NEXT:    [[IV_2_NEXT]] = add i64 [[IV_2]], -1
+; I64-NEXT:    [[EC:%.*]] = icmp sgt i64 [[IV_2]], 100
+; I64-NEXT:    br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT:.*]]
+; I64:       [[EXIT]]:
+; I64-NEXT:    ret void
+;
+; I32-LABEL: define void @loaded_address_used_by_load_through_blend(
+; I32-SAME: i64 [[START:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[SRC_2:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0]] {
+; I32-NEXT:  [[ENTRY:.*:]]
+; I32-NEXT:    [[TMP0:%.*]] = add i64 [[START]], 1
+; I32-NEXT:    [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 100)
+; I32-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]]
+; I32-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 8
+; I32-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; I32:       [[VECTOR_PH]]:
+; I32-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8
+; I32-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; I32-NEXT:    [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
+; I32-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[START]], i64 0
+; I32-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; I32-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x ptr> poison, ptr [[SRC_2]], i64 0
+; I32-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT1]], <8 x ptr> poison, <8 x i32> zeroinitializer
+; I32-NEXT:    br label %[[VECTOR_BODY:.*]]
+; I32:       [[VECTOR_BODY]]:
+; I32-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; I32-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; I32-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
+; I32-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 2
+; I32-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
+; I32-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 4
+; I32-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 5
+; I32-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 6
+; I32-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 7
+; I32-NEXT:    [[TMP11:%.*]] = add i64 [[TMP3]], 1
+; I32-NEXT:    [[TMP12:%.*]] = add i64 [[TMP4]], 1
+; I32-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], 1
+; I32-NEXT:    [[TMP14:%.*]] = add i64 [[TMP6]], 1
+; I32-NEXT:    [[TMP15:%.*]] = add i64 [[TMP7]], 1
+; I32-NEXT:    [[TMP16:%.*]] = add i64 [[TMP8]], 1
+; I32-NEXT:    [[TMP17:%.*]] = add i64 [[TMP9]], 1
+; I32-NEXT:    [[TMP18:%.*]] = add i64 [[TMP10]], 1
+; I32-NEXT:    [[TMP19:%.*]] = insertelement <8 x i64> poison, i64 [[TMP11]], i32 0
+; I32-NEXT:    [[TMP20:%.*]] = insertelement <8 x i64> [[TMP19]], i64 [[TMP12]], i32 1
+; I32-NEXT:    [[TMP21:%.*]] = insertelement <8 x i64> [[TMP20]], i64 [[TMP13]], i32 2
+; I32-NEXT:    [[TMP22:%.*]] = insertelement <8 x i64> [[TMP21]], i64 [[TMP14]], i32 3
+; I32-NEXT:    [[TMP23:%.*]] = insertelement <8 x i64> [[TMP22]], i64 [[TMP15]], i32 4
+; I32-NEXT:    [[TMP24:%.*]] = insertelement <8 x i64> [[TMP23]], i64 [[TMP16]], i32 5
+; I32-NEXT:    [[TMP25:%.*]] = insertelement <8 x i64> [[TMP24]], i64 [[TMP17]], i32 6
+; I32-NEXT:    [[TMP26:%.*]] = insertelement <8 x i64> [[TMP25]], i64 [[TMP18]], i32 7
+; I32-NEXT:    [[TMP27:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP11]]
+; I32-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP12]]
+; I32-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]]
+; I32-NEXT:    [[TMP30:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP14]]
+; I32-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP15]]
+; I32-NEXT:    [[TMP32:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP16]]
+; I32-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP17]]
+; I32-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]]
+; I32-NEXT:    [[TMP35:%.*]] = load float, ptr [[TMP27]], align 4
+; I32-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP28]], align 4
+; I32-NEXT:    [[TMP37:%.*]] = load float, ptr [[TMP29]], align 4
+; I32-NEXT:    [[TMP38:%.*]] = load float, ptr [[TMP30]], align 4
+; I32-NEXT:    [[TMP39:%.*]] = load float, ptr [[TMP31]], align 4
+; I32-NEXT:    [[TMP40:%.*]] = load float, ptr [[TMP32]], align 4
+; I32-NEXT:    [[TMP41:%.*]] = load float, ptr [[TMP33]], align 4
+; I32-NEXT:    [[TMP42:%.*]] = load float, ptr [[TMP34]], align 4
+; I32-NEXT:    [[TMP43:%.*]] = insertelement <8 x float> poison, float [[TMP35]], i32 0
+; I32-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP36]], i32 1
+; I32-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[TMP37]], i32 2
+; I32-NEXT:    [[TMP46:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP38]], i32 3
+; I32-NEXT:    [[TMP47:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP39]], i32 4
+; I32-NEXT:    [[TMP48:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP40]], i32 5
+; I32-NEXT:    [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[TMP41]], i32 6
+; I32-NEXT:    [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[TMP42]], i32 7
+; I32-NEXT:    [[TMP51:%.*]] = fcmp oeq <8 x float> [[TMP50]], zeroinitializer
+; I32-NEXT:    [[TMP52:%.*]] = mul <8 x i64> [[TMP26]], [[BROADCAST_SPLAT]]
+; I32-NEXT:    [[TMP53:%.*]] = extractelement <8 x i64> [[TMP52]], i32 0
+; I32-NEXT:    [[TMP54:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP53]]
+; I32-NEXT:    [[TMP55:%.*]] = extractelement <8 x i64> [[TMP52]], i32 1
+; I32-NEXT:    [[TMP56:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP55]]
+; I32-NEXT:    [[TMP57:%.*]] = extractelement <8 x i64> [[TMP52]], i32 2
+; I32-NEXT:    [[TMP58:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP57]]
+; I32-NEXT:    [[TMP59:%.*]] = extractelement <8 x i64> [[TMP52]], i32 3
+; I32-NEXT:    [[TMP60:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP59]]
+; I32-NEXT:    [[TMP61:%.*]] = extractelement <8 x i64> [[TMP52]], i32 4
+; I32-NEXT:    [[TMP62:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP61]]
+; I32-NEXT:    [[TMP63:%.*]] = extractelement <8 x i64> [[TMP52]], i32 5
+; I32-NEXT:    [[TMP64:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP63]]
+; I32-NEXT:    [[TMP65:%.*]] = extractelement <8 x i64> [[TMP52]], i32 6
+; I32-NEXT:    [[TMP66:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP65]]
+; I32-NEXT:    [[TMP67:%.*]] = extractelement <8 x i64> [[TMP52]], i32 7
+; I32-NEXT:    [[TMP68:%.*]] = getelementptr i8, ptr [[SRC_2]], i64 [[TMP67]]
+; I32-NEXT:    [[TMP69:%.*]] = insertelement <8 x ptr> poison, ptr [[TMP54]], i32 0
+; I32-NEXT:    [[TMP70:%.*]] = insertelement <8 x ptr> [[TMP69]], ptr [[TMP56]], i32 1
+; I32-NEXT:    [[TMP71:%.*]] = insertelement <8 x ptr> [[TMP70]], ptr [[TMP58]], i32 2
+; I32-NEXT:    [[TMP72:%.*]] = insertelement <8 x ptr> [[TMP71]], ptr [[TMP60]], i32 3
+; I32-NEXT:    [[TMP73:%.*]] = insertelement <8 x ptr> [[TMP72]], ptr [[TMP62]], i32 4
+; I32-NEXT:    [[TMP74:%.*]] = insertelement <8 x ptr> [[TMP73]], ptr [[TMP64]], i32 5
+; I32-NEXT:    [[TMP75:%.*]] = insertelement <8 x ptr> [[TMP74]], ptr [[TMP66]], i32 6
+; I32-NEXT:    [[TMP76:%.*]] = insertelement <8 x ptr> [[TMP75]], ptr [[TMP68]], i32 7
+; I32-NEXT:    [[PREDPHI:%.*]] = select <8 x i1> [[TMP51]], <8 x ptr> [[TMP76]], <8 x ptr> [[BROADCAST_SPLAT2]]
+; I32-NEXT:    [[TMP77:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 0
+; I32-NEXT:    [[TMP78:%.*]] = load float, ptr [[TMP77]], align 4
+; I32-NEXT:    [[TMP79:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 1
+; I32-NEXT:    [[TMP80:%.*]] = load float, ptr [[TMP79]], align 4
+; I32-NEXT:    [[TMP81:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 2
+; I32-NEXT:    [[TMP82:%.*]] = load float, ptr [[TMP81]], align 4
+; I32-NEXT:    [[TMP83:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 3
+; I32-NEXT:    [[TMP84:%.*]] = load float, ptr [[TMP83]], align 4
+; I32-NEXT:    [[TMP85:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 4
+; I32-NEXT:    [[TMP86:%.*]] = load float, ptr [[TMP85]], align 4
+; I32-NEXT:    [[TMP87:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 5
+; I32-NEXT:    [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4
+; I32-NEXT:    [[TMP89:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 6
+; I32-NEXT:    [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4
+; I32-NEXT:    [[TMP91:%.*]] = extractelement <8 x ptr> [[PREDPHI]], i32 7
+; I32-NEXT:    [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4
+; I32-NEXT:    [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; I32-NEXT:    [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
+; I32-NEXT:    [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
+; I32-NEXT:    [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
+; I32-NEXT:    [[TMP97:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
+; I32-NEXT:    [[TMP98:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; I32-NEXT:    [[TMP99:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]]
+; I32-NEXT:    [[TMP100:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
+; I32-NEXT:    store float [[TMP78]], ptr [[TMP93]], align 4
+; I32-NEXT:    store float [[TMP80]], ptr [[TMP94]], align 4
+; I32-NEXT:    store float [[TMP82]], ptr [[TMP95]], align 4
+; I32-NEXT:    store float [[TMP84]], ptr [[TMP96]], align 4
+; I32-NEXT:    store float [[TMP86]], ptr [[TMP97]], align 4
+; I32-NEXT:    store float [[TMP88]], ptr [[TMP98]], align 4
+; I32-NEXT:    store float [[TMP90]], ptr [[TMP99]], align 4
+; I32-NEXT:    store float [[TMP92]], ptr [[TMP100]], align 4
+; I32-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; I32-NEXT:    [[TMP101:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; I32-NEXT:    br i1 [[TMP101]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; I32:       [[MIDDLE_BLOCK]]:
+; I32-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; I32-NEXT:    br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; I32:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %iv.2 = phi i64 [ %start, %entry ], [ %iv.2.next, %loop.latch ]
+  %iv.1 = add i64 %iv, 1
+  %gep.src = getelementptr i8, ptr %src, i64 %iv.1
+  %l.src = load float, ptr %gep.src, align 4
+  %c = fcmp oeq float %l.src, 0.000000e+00
+  br i1 %c, label %then, label %loop.latch
+
+then:
+  %iv.mul = mul i64 %iv.1, %start
+  %gep.src.2 = getelementptr i8, ptr %src.2, i64 %iv.mul
+  br label %loop.latch
+
+loop.latch:
+  %merge.gep = phi ptr [ %gep.src.2, %then ], [ %src.2, %loop.header ]
+  %l.2 = load float, ptr %merge.gep, align 4
+  %gep.dst = getelementptr i8, ptr %dst, i64 %iv
+  store float %l.2, ptr %gep.dst, align 4
+  %iv.next = add i64 %iv, 1
+  %iv.2.next = add i64 %iv.2, -1
+  %ec = icmp sgt i64 %iv.2, 100
+  br i1 %ec, label %loop.header, label %exit
+
+exit:
+  ret void
+}
+
+attributes #0 = { "target-cpu"="znver3" }
 attributes #0 = { "target-cpu"="znver2" }
 
 !0 = distinct !{!0, !1}
diff --git a/llvm/test/Transforms/NewGVN/ptrtoaddr.ll b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll
new file mode 100644
index 0000000..e51b42a
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/ptrtoaddr.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -passes=newgvn < %s | FileCheck %s
+
+define i64 @ptrtoaddr_same(ptr %p) {
+; CHECK-LABEL: define i64 @ptrtoaddr_same(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    ret i64 0
+;
+  %i = ptrtoaddr ptr %p to i64
+  %j = ptrtoaddr ptr %p to i64
+  %sub = sub i64 %i, %j
+  ret i64 %sub
+}
+
+; Note that unlike for ptrtoint, it's not possible for ptrtoaddr to differ
+; in result type for the same input.
+define i64 @ptrtoaddr_different(ptr %p, ptr %p2) {
+; CHECK-LABEL: define i64 @ptrtoaddr_different(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:    [[I:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT:    [[J:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[I]], [[J]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %i = ptrtoaddr ptr %p to i64
+  %j = ptrtoaddr ptr %p2 to i64
+  %sub = sub i64 %i, %j
+  ret i64 %sub
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll
index caf7a80..7c9888f 100644
--- a/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll
+++ b/llvm/test/Transforms/PhaseOrdering/switch-to-arithmetic-inlining.ll
@@ -436,10 +436,11 @@ bb104:                                            ; preds = %bb102
   br label %bb105
 }
 
+; Make sure the call is inlined.
 define i8 @test2(i8 %x) {
 ; CHECK-LABEL: define range(i8 0, 53) i8 @test2(
 ; CHECK-SAME: i8 [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i8 @test(i8 [[X]])
+; CHECK-NEXT:    [[CALL:%.*]] = tail call range(i8 0, 53) i8 @llvm.umin.i8(i8 [[X]], i8 52)
 ; CHECK-NEXT:    ret i8 [[CALL]]
 ;
   %call = call i8 @test(i8 %x)
diff --git a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll
index 9bbe3eb..42d3dcc 100644
--- a/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll
+++ b/llvm/test/Transforms/SimplifyCFG/merge-calls-alloc-token.ll
@@ -97,8 +97,8 @@ if.end:
   ret ptr %x.0
 }
 
-!0 = !{!"int"}
-!1 = !{!"char[4]"}
+!0 = !{!"int", i1 0}
+!1 = !{!"char[4]", i1 0}
 ;.
-; CHECK: [[META0]] = !{!"int"}
+; CHECK: [[META0]] = !{!"int", i1 false}
 ;.
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll
index c9063d3..25267dc 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=OPTNOLUT
+; RUN: opt -S -passes='simplifycfg<switch-to-arithmetic>' < %s | FileCheck %s --check-prefix=OPTNOLUT
 ; RUN: %if amdgpu-registered-target %{ opt -mtriple=amdgcn--amdpal -S -passes='simplifycfg<switch-to-lookup>' < %s | FileCheck %s --check-prefix=TTINOLUT %}
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -7,23 +7,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define i32 @linear_transform_with_default(i32 %x) {
 ; OPTNOLUT-LABEL: define i32 @linear_transform_with_default(
 ; OPTNOLUT-SAME: i32 [[X:%.*]]) {
-; OPTNOLUT-NEXT:  [[ENTRY:.*]]:
-; OPTNOLUT-NEXT:    switch i32 [[X]], label %[[END:.*]] [
-; OPTNOLUT-NEXT:      i32 0, label %[[CASE0:.*]]
-; OPTNOLUT-NEXT:      i32 1, label %[[CASE1:.*]]
-; OPTNOLUT-NEXT:      i32 2, label %[[CASE2:.*]]
-; OPTNOLUT-NEXT:      i32 3, label %[[CASE3:.*]]
-; OPTNOLUT-NEXT:    ]
-; OPTNOLUT:       [[CASE0]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[CASE1]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[CASE2]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[CASE3]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[END]]:
-; OPTNOLUT-NEXT:    [[IDX:%.*]] = phi i32 [ 1, %[[CASE0]] ], [ 4, %[[CASE1]] ], [ 7, %[[CASE2]] ], [ 10, %[[CASE3]] ], [ 13, %[[ENTRY]] ]
+; OPTNOLUT-NEXT:  [[ENTRY:.*:]]
+; OPTNOLUT-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 4
+; OPTNOLUT-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3
+; OPTNOLUT-NEXT:    [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1
+; OPTNOLUT-NEXT:    [[IDX:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 13
 ; OPTNOLUT-NEXT:    ret i32 [[IDX]]
 ;
 ; TTINOLUT-LABEL: define i32 @linear_transform_with_default(
@@ -138,26 +126,8 @@ end:
 define i32 @linear_transform_no_default(i32 %x) {
 ; OPTNOLUT-LABEL: define i32 @linear_transform_no_default(
 ; OPTNOLUT-SAME: i32 [[X:%.*]]) {
-; OPTNOLUT-NEXT:  [[ENTRY:.*]]:
-; OPTNOLUT-NEXT:    switch i32 [[X]], label %[[DEFAULT:.*]] [
-; OPTNOLUT-NEXT:      i32 0, label %[[END:.*]]
-; OPTNOLUT-NEXT:      i32 1, label %[[CASE1:.*]]
-; OPTNOLUT-NEXT:      i32 2, label %[[CASE2:.*]]
-; OPTNOLUT-NEXT:      i32 3, label %[[CASE3:.*]]
-; OPTNOLUT-NEXT:      i32 4, label %[[CASE4:.*]]
-; OPTNOLUT-NEXT:    ]
-; OPTNOLUT:       [[CASE1]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[CASE2]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[CASE3]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[CASE4]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[DEFAULT]]:
-; OPTNOLUT-NEXT:    unreachable
-; OPTNOLUT:       [[END]]:
-; OPTNOLUT-NEXT:    [[SWITCH_IDX_MULT:%.*]] = phi i32 [ 3, %[[CASE1]] ], [ 6, %[[CASE2]] ], [ 9, %[[CASE3]] ], [ 12, %[[CASE4]] ], [ 0, %[[ENTRY]] ]
+; OPTNOLUT-NEXT:  [[ENTRY:.*:]]
+; OPTNOLUT-NEXT:    [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3
 ; OPTNOLUT-NEXT:    ret i32 [[SWITCH_IDX_MULT]]
 ;
 ; TTINOLUT-LABEL: define i32 @linear_transform_no_default(
@@ -350,18 +320,9 @@ end:
 define i32 @single_value_withdefault(i32 %x) {
 ; OPTNOLUT-LABEL: define i32 @single_value_withdefault(
 ; OPTNOLUT-SAME: i32 [[X:%.*]]) {
-; OPTNOLUT-NEXT:  [[ENTRY:.*]]:
-; OPTNOLUT-NEXT:    switch i32 [[X]], label %[[DEFAULT:.*]] [
-; OPTNOLUT-NEXT:      i32 0, label %[[END:.*]]
-; OPTNOLUT-NEXT:      i32 1, label %[[END]]
-; OPTNOLUT-NEXT:      i32 2, label %[[END]]
-; OPTNOLUT-NEXT:      i32 3, label %[[END]]
-; OPTNOLUT-NEXT:      i32 4, label %[[END]]
-; OPTNOLUT-NEXT:    ]
-; OPTNOLUT:       [[DEFAULT]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[END]]:
-; OPTNOLUT-NEXT:    [[DOT:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ]
+; OPTNOLUT-NEXT:  [[ENTRY:.*:]]
+; OPTNOLUT-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 5
+; OPTNOLUT-NEXT:    [[DOT:%.*]] = select i1 [[TMP0]], i32 2, i32 3
 ; OPTNOLUT-NEXT:    ret i32 [[DOT]]
 ;
 ; TTINOLUT-LABEL: define i32 @single_value_withdefault(
@@ -401,18 +362,9 @@ end:
 define i32 @single_value_no_jump_tables(i32 %x) "no-jump-tables"="true" {
 ; OPTNOLUT-LABEL: define i32 @single_value_no_jump_tables(
 ; OPTNOLUT-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
-; OPTNOLUT-NEXT:  [[ENTRY:.*]]:
-; OPTNOLUT-NEXT:    switch i32 [[X]], label %[[DEFAULT:.*]] [
-; OPTNOLUT-NEXT:      i32 0, label %[[END:.*]]
-; OPTNOLUT-NEXT:      i32 1, label %[[END]]
-; OPTNOLUT-NEXT:      i32 2, label %[[END]]
-; OPTNOLUT-NEXT:      i32 3, label %[[END]]
-; OPTNOLUT-NEXT:      i32 4, label %[[END]]
-; OPTNOLUT-NEXT:    ]
-; OPTNOLUT:       [[DEFAULT]]:
-; OPTNOLUT-NEXT:    br label %[[END]]
-; OPTNOLUT:       [[END]]:
-; OPTNOLUT-NEXT:    [[IDX:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ]
+; OPTNOLUT-NEXT:  [[ENTRY:.*:]]
+; OPTNOLUT-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 5
+; OPTNOLUT-NEXT:    [[IDX:%.*]] = select i1 [[TMP0]], i32 2, i32 3
 ; OPTNOLUT-NEXT:    ret i32 [[IDX]]
 ;
 ; TTINOLUT-LABEL: define i32 @single_value_no_jump_tables(
@@ -449,6 +401,60 @@ end:
   ret i32 %idx
 }
 
+define i1 @single_value_with_mask(i32 %x) {
+; OPTNOLUT-LABEL: define i1 @single_value_with_mask(
+; OPTNOLUT-SAME: i32 [[X:%.*]]) {
+; OPTNOLUT-NEXT:  [[ENTRY:.*]]:
+; OPTNOLUT-NEXT:    switch i32 [[X]], label %[[DEFAULT:.*]] [
+; OPTNOLUT-NEXT:      i32 18, label %[[END:.*]]
+; OPTNOLUT-NEXT:      i32 21, label %[[END]]
+; OPTNOLUT-NEXT:      i32 48, label %[[END]]
+; OPTNOLUT-NEXT:      i32 16, label %[[END]]
+; OPTNOLUT-NEXT:    ]
+; OPTNOLUT:       [[DEFAULT]]:
+; OPTNOLUT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 80
+; OPTNOLUT-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true
+; OPTNOLUT-NEXT:    br label %[[END]]
+; OPTNOLUT:       [[END]]:
+; OPTNOLUT-NEXT:    [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ]
+; OPTNOLUT-NEXT:    ret i1 [[RES]]
+;
+; TTINOLUT-LABEL: define i1 @single_value_with_mask(
+; TTINOLUT-SAME: i32 [[X:%.*]]) {
+; TTINOLUT-NEXT:  [[ENTRY:.*]]:
+; TTINOLUT-NEXT:    [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X]], 16
+; TTINOLUT-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 33
+; TTINOLUT-NEXT:    [[SWITCH_MASKINDEX:%.*]] = zext i32 [[SWITCH_TABLEIDX]] to i64
+; TTINOLUT-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i64 4294967333, [[SWITCH_MASKINDEX]]
+; TTINOLUT-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i64 [[SWITCH_SHIFTED]] to i1
+; TTINOLUT-NEXT:    [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false
+; TTINOLUT-NEXT:    br i1 [[OR_COND]], label %[[END:.*]], label %[[DEFAULT:.*]]
+; TTINOLUT:       [[DEFAULT]]:
+; TTINOLUT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 80
+; TTINOLUT-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true
+; TTINOLUT-NEXT:    br label %[[END]]
+; TTINOLUT:       [[END]]:
+; TTINOLUT-NEXT:    [[RES:%.*]] = phi i1 [ [[SEL]], %[[DEFAULT]] ], [ false, %[[ENTRY]] ]
+; TTINOLUT-NEXT:    ret i1 [[RES]]
+;
+entry:
+  switch i32 %x, label %default [
+  i32 18, label %end
+  i32 21, label %end
+  i32 48, label %end
+  i32 16, label %end
+  ]
+
+default:
+  %cmp = icmp eq i32 %x, 80
+  %sel = select i1 %cmp, i1 false, i1 true
+  br label %end
+
+end:
+  %res = phi i1 [ false, %entry ], [ false, %entry ], [ false, %entry ], [ false, %entry ], [ %sel, %default ]
+  ret i1 %res
+}
+
 define i32 @lookup_table(i32 %x) {
 ; OPTNOLUT-LABEL: define i32 @lookup_table(
 ; OPTNOLUT-SAME: i32 [[X:%.*]]) {
diff --git a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe
index 309476a..a4c36a3 100644
--- a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe
+++ b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.exe
diff --git a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript
index ec5c8ff..29a8803 100644
--- a/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript
+++ b/llvm/test/tools/llvm-profgen/Inputs/coff-profile.perfscript
@@ -1,13 +1,13 @@
 PERF_RECORD_MMAP2 5752/0: [0x7ff70a1b0000(0x640000) @ 0x1000 00:00 0 0]: r-xp c:\Users\haohaiwe\Desktop\coff-profile.exe
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/P/X/A/0 0x7ff70a1b1415/0x7ff70a1b13b0/M/X/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/-/X/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
- 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0 0x7ff70a1b1482/0x7ff70a1b1430/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/P/X/A/0 0x7ff70a1b1400/0x7ff70a1b13a0/M/X/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/-/X/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
+ 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0 0x7ff70a1b1461/0x7ff70a1b1410/P/-/A/0
diff --git a/llvm/test/tools/llvm-profgen/coff-profile.test b/llvm/test/tools/llvm-profgen/coff-profile.test
index 5578f73..6411642 100644
--- a/llvm/test/tools/llvm-profgen/coff-profile.test
+++ b/llvm/test/tools/llvm-profgen/coff-profile.test
@@ -1,37 +1,77 @@
+; RUN: llvm-profgen --format=text --use-dwarf-correlation --perfscript=%S/Inputs/coff-profile.perfscript --binary=%S/Inputs/coff-profile.exe --output=%t
+; RUN: FileCheck %s --input-file %t --check-prefix=DWARF
 ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/coff-profile.perfscript --binary=%S/Inputs/coff-profile.exe --output=%t
-; RUN: FileCheck %s --input-file %t
+; RUN: FileCheck %s --input-file %t --check-prefix=PROBE
 
-CHECK:      main:31837:0
-CHECK-NEXT:  0: 0
-CHECK-NEXT:  3.1: 0
-CHECK-NEXT:  3.2: 0
-CHECK-NEXT:  8: 0
-CHECK-NEXT:  65501: 0
-CHECK-NEXT:  1: ??$init@HG@MyNameSpace2@@YAXHPEAG@Z:0
-CHECK-NEXT:   1: 0
-CHECK-NEXT:   1.1: 0
-CHECK-NEXT:   1.2: 0
-CHECK-NEXT:   2: 0
-CHECK-NEXT:   65514: 0
-CHECK-NEXT:  4: ?work1@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:3193
-CHECK-NEXT:   0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:3193
-CHECK-NEXT:    1.1: 31
-CHECK-NEXT:    1.2: 31
-CHECK-NEXT:    2: 31
-CHECK-NEXT:    3: 31
-CHECK-NEXT:    65530: 0
-CHECK-NEXT:  5: ?work2@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:28644
-CHECK-NEXT:   0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:28644
-CHECK-NEXT:    1.1: 341
-CHECK-NEXT:    1.2: 341
-CHECK-NEXT:    2: 341
-CHECK-NEXT:    3: 341
-CHECK-NEXT:    65530: 0
-CHECK-NEXT:  7: ?print@MyNameSpace2@@YAXPEAGH@Z:0
-CHECK-NEXT:   1: 0
+DWARF:      main:31341:0
+DWARF-NEXT:  0: 0
+DWARF-NEXT:  3: 0
+DWARF-NEXT:  3.1: 0
+DWARF-NEXT:  3.2: 0
+DWARF-NEXT:  8: 0
+DWARF-NEXT:  65501: 0
+DWARF-NEXT:  1: ??$init@HG@MyNameSpace2@@YAXHPEAG@Z:0
+DWARF-NEXT:   1: 0
+DWARF-NEXT:   1.1: 0
+DWARF-NEXT:   1.2: 0
+DWARF-NEXT:   2: 0
+DWARF-NEXT:   65514: 0
+DWARF-NEXT:  4: ?work1@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:3038
+DWARF-NEXT:   0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:3038
+DWARF-NEXT:    1.1: 31
+DWARF-NEXT:    1.2: 31
+DWARF-NEXT:    2: 31
+DWARF-NEXT:    3: 31
+DWARF-NEXT:  5: ?work2@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:28303
+DWARF-NEXT:   0: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:28303
+DWARF-NEXT:    1.1: 341
+DWARF-NEXT:    1.2: 341
+DWARF-NEXT:    2: 341
+DWARF-NEXT:    3: 341
+DWARF-NEXT:  7: ?print@MyNameSpace2@@YAXPEAGH@Z:0
+DWARF-NEXT:   1: 0
+
+PROBE:       main:1116:0
+PROBE-NEXT:   1: 0
+PROBE-NEXT:   3: 0
+PROBE-NEXT:   4: 0
+PROBE-NEXT:   5: 0
+PROBE-NEXT:   8: 0
+PROBE-NEXT:   9: 0
+PROBE-NEXT:   2: ??$init@HG@MyNameSpace2@@YAXHPEAG@Z:0
+PROBE-NEXT:    1: 0
+PROBE-NEXT:    2: 0
+PROBE-NEXT:    3: 0
+PROBE-NEXT:    4: 0
+PROBE-NEXT:    5: 0
+PROBE-NEXT:    6: 0
+PROBE-NEXT:    !CFGChecksum: 107105011060
+PROBE-NEXT:   6: ?work1@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:93
+PROBE-NEXT:    1: 0
+PROBE-NEXT:    2: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:93
+PROBE-NEXT:     1: 0
+PROBE-NEXT:     2: 31
+PROBE-NEXT:     4: 31
+PROBE-NEXT:     5: 31
+PROBE-NEXT:     !CFGChecksum: 107105011060
+PROBE-NEXT:    !CFGChecksum: 281479271677951
+PROBE-NEXT:   7: ?work2@?$MyClass@GH@MyNameSpace1@@QEAAXQEAGH@Z:1023
+PROBE-NEXT:    2: ?work@?$MyClass@GH@MyNameSpace1@@AEAAXQEAGHH@Z:1023
+PROBE-NEXT:     2: 341
+PROBE-NEXT:     3: 0
+PROBE-NEXT:     4: 341
+PROBE-NEXT:     5: 341
+PROBE-NEXT:     6: 0
+PROBE-NEXT:     !CFGChecksum: 107105011060
+PROBE-NEXT:    !CFGChecksum: 281479271677951
+PROBE-NEXT:   10: ?print@MyNameSpace2@@YAXPEAGH@Z:0
+PROBE-NEXT:    1: 0
+PROBE-NEXT:    2: 0
+PROBE-NEXT:    !CFGChecksum: 281479271677951
+PROBE-NEXT:   !CFGChecksum: 1126005794311845
 
 ; Original code
-; clang-cl.exe -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld -Xclang -fdebug-info-for-profiling -link -debug:dwarf
+; clang-cl.exe -O2 -gdwarf -gline-tables-only -fpseudo-probe-for-profiling coff-profile.cpp -fuse-ld=lld -Xclang -fdebug-info-for-profiling -link -debug:dwarf
 
 #include <stdio.h>