201 files changed, 6944 insertions, 455 deletions
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll
index 6eed0ec..4c2a9c3 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll
@@ -54,6 +54,100 @@ exit:
   ret void
 }
 
+; Same as @test_inbounds_gep_used_in_predicated_block, but also storing the
+; pointer values in the header.
+define void @test_inbounds_gep_used_in_predicated_block_stored_value_operand(ptr %A, i64 %n, ptr noalias %B) {
+; CHECK-LABEL: 'test_inbounds_gep_used_in_predicated_block_stored_value_operand'
+; CHECK-NEXT:    loop.header:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ]
+  %offset.0 = phi i64 [ 0, %entry ], [ %offset.0.next, %loop.latch ]
+  %offset.1 = phi i64 [ 0, %entry ], [ %offset.1.next, %loop.latch ]
+  %idx.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+  %idx.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+  %mask = and i64 %i, 3
+  %cond = icmp eq i64 %mask, 0
+  store ptr %idx.0, ptr %B
+  store ptr %idx.1, ptr %B
+  br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+  store i8 2, ptr %idx.0
+  store i8 1, ptr %idx.1
+  br label %loop.latch
+
+loop.latch:
+  %i.next = add nuw nsw i64 %i, 1
+  %offset.0.next = add i64 %offset.0, 4611686018427387905 ; 2^62 + 1
+  %offset.1.next = add i64 %offset.1, 4611686018427387906 ; 2^62 + 2
+  %cond.exit = icmp eq i64 %i.next, 100
+  br i1 %cond.exit, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+
+; Same as @test_inbounds_gep_used_in_predicated_block_non_memop_user, but with
+; extra GEP users in the header.
+define void @test_inbounds_gep_used_in_predicated_block_non_memop_user(ptr %A, i64 %n) {
+; CHECK-LABEL: 'test_inbounds_gep_used_in_predicated_block_non_memop_user'
+; CHECK-NEXT:    loop.header:
+; CHECK-NEXT:      Memory dependences are safe
+; CHECK-NEXT:      Dependences:
+; CHECK-NEXT:      Run-time memory checks:
+; CHECK-NEXT:      Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:      SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:      Expressions re-written:
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop.latch ]
+  %offset.0 = phi i64 [ 0, %entry ], [ %offset.0.next, %loop.latch ]
+  %offset.1 = phi i64 [ 0, %entry ], [ %offset.1.next, %loop.latch ]
+  %idx.0 = getelementptr inbounds i8, ptr %A, i64 %offset.0
+  %idx.1 = getelementptr inbounds i8, ptr %A, i64 %offset.1
+  %mask = and i64 %i, 3
+  %cond = icmp eq i64 %mask, 0
+  %gep.idx.0 = getelementptr inbounds i8, ptr %idx.0, i8 1
+  %gep.idx.1 = getelementptr inbounds i8, ptr %idx.1, i8 1
+  br i1 %cond, label %if.then, label %loop.latch
+
+if.then:
+  store i8 2, ptr %idx.0
+  store i8 1, ptr %idx.1
+  br label %loop.latch
+
+loop.latch:
+  %i.next = add nuw nsw i64 %i, 1
+  %offset.0.next = add i64 %offset.0, 4611686018427387905 ; 2^62 + 1
+  %offset.1.next = add i64 %offset.1, 4611686018427387906 ; 2^62 + 2
+  %cond.exit = icmp eq i64 %i.next, 100
+  br i1 %cond.exit, label %exit, label %loop.header
+
+exit:
+  store i32 0, ptr %gep.idx.0
+  store i32 0, ptr %gep.idx.1
+  ret void
+}
+
 define void @test_header_existing(ptr %src, ptr %dst, i64 %start) {
 ; CHECK-LABEL: 'test_header_existing'
 ; CHECK-NEXT:    loop.header:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 076cbf7..a505b42 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -1408,6 +1408,88 @@ define <4 x i16> @ext_via_i19(<4 x i16> %a) {
   ret <4 x i16> %t6
 }
 
+define <8 x i8> @srhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
+; CHECK-LABEL: srhadd_v8i8_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.8b v0, v0, v1
+; CHECK-NEXT:    ret
+  %s0s = sext <8 x i8> %s0 to <8 x i16>
+  %s1s = sext <8 x i8> %s1 to <8 x i16>
+  %s = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
+  %s2 = trunc <8 x i16> %s to <8 x i8>
+  ret <8 x i8> %s2
+}
+
+define <4 x i16> @srhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
+; CHECK-LABEL: srhadd_v4i16_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    srhadd.4h v0, v0, v1
+; CHECK-NEXT:    ret
+  %s0s = sext <4 x i16> %s0 to <4 x i32>
+  %s1s = sext <4 x i16> %s1 to <4 x i32>
+  %s = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
+  %s2 = trunc <4 x i32> %s to <4 x i16>
+  ret <4 x i16> %s2
+}
+
+define <2 x i32> @srhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
+; CHECK-LABEL: srhadd_v2i32_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll.2d v0, v0, #0
+; CHECK-NEXT:    sshll.2d v1, v1, #0
+; CHECK-NEXT:    eor.16b v2, v0, v1
+; CHECK-NEXT:    orr.16b v0, v0, v1
+; CHECK-NEXT:    ushr.2d v1, v2, #1
+; CHECK-NEXT:    sub.2d v0, v0, v1
+; CHECK-NEXT:    xtn.2s v0, v0
+; CHECK-NEXT:    ret
+  %s0s = sext <2 x i32> %s0 to <2 x i64>
+  %s1s = sext <2 x i32> %s1 to <2 x i64>
+  %s = call <2 x i64> @llvm.aarch64.neon.urhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
+  %s2 = trunc <2 x i64> %s to <2 x i32>
+  ret <2 x i32> %s2
+}
+
+define <8 x i8> @urhadd_v8i8_trunc(<8 x i8> %s0, <8 x i8> %s1) {
+; CHECK-LABEL: urhadd_v8i8_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.8b v0, v0, v1
+; CHECK-NEXT:    ret
+  %s0s = zext <8 x i8> %s0 to <8 x i16>
+  %s1s = zext <8 x i8> %s1 to <8 x i16>
+  %s = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0s, <8 x i16> %s1s)
+  %s2 = trunc <8 x i16> %s to <8 x i8>
+  ret <8 x i8> %s2
+}
+
+define <4 x i16> @urhadd_v4i16_trunc(<4 x i16> %s0, <4 x i16> %s1) {
+; CHECK-LABEL: urhadd_v4i16_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd.4h v0, v0, v1
+; CHECK-NEXT:    ret
+  %s0s = zext <4 x i16> %s0 to <4 x i32>
+  %s1s = zext <4 x i16> %s1 to <4 x i32>
+  %s = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %s0s, <4 x i32> %s1s)
+  %s2 = trunc <4 x i32> %s to <4 x i16>
+  ret <4 x i16> %s2
+}
+
+define <2 x i32> @urhadd_v2i32_trunc(<2 x i32> %s0, <2 x i32> %s1) {
+; CHECK-LABEL: urhadd_v2i32_trunc:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    uaddl.2d v0, v0, v1
+; CHECK-NEXT:    dup.2d v1, x8
+; CHECK-NEXT:    add.2d v0, v0, v1
+; CHECK-NEXT:    shrn.2s v0, v0, #1
+; CHECK-NEXT:    ret
+  %s0s = zext <2 x i32> %s0 to <2 x i64>
+  %s1s = zext <2 x i32> %s1 to <2 x i64>
+  %s = call <2 x i64> @llvm.aarch64.neon.srhadd.v2i64(<2 x i64> %s0s, <2 x i64> %s1s)
+  %s2 = trunc <2 x i64> %s to <2 x i32>
+  ret <2 x i32> %s2
+}
+
 declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
 declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>)
 declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll b/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
index 63c6533..a5b7612 100644
--- a/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
+++ b/llvm/test/CodeGen/AArch64/sme-support-routines-calling-convention.ll
@@ -64,6 +64,6 @@ define i64 @test_sme_calling_convention_x2() nounwind {
   ret i64 %pstate.sm
 }
 
-declare void @__arm_tpidr2_save()
-declare i64 @__arm_get_current_vg()
-declare {i64, i64} @__arm_sme_state()
+declare aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save()
+declare aarch64_sme_preservemost_from_x1 i64 @__arm_get_current_vg()
+declare aarch64_sme_preservemost_from_x2 {i64, i64} @__arm_sme_state()
diff --git a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
index 2d7a91f..985bcbd 100644
--- a/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait-before-stores-with-scope_sys.ll
@@ -1,22 +1,50 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
-; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s
 
 define amdgpu_ps void @intrinsic_store_system_scope(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
-; GFX12-LABEL: intrinsic_store_system_scope:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
-; GFX12-NEXT:    s_endpgm
+; GFX1200-LABEL: intrinsic_store_system_scope:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    buffer_store_b32 v0, v[1:2], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: intrinsic_store_system_scope:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT:    buffer_store_b32 v0, v[2:3], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: intrinsic_store_system_scope:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT:    buffer_store_b32 v0, v[4:5], s[0:3], s4 idxen offen scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT:    s_endpgm
   call void @llvm.amdgcn.struct.buffer.store.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 24)
   ret void
 }
 
 define amdgpu_ps void @generic_store_volatile(i32 %val, ptr addrspace(1) %out) {
-; GFX12-LABEL: generic_store_volatile:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
-; GFX12-NEXT:    s_wait_storecnt 0x0
-; GFX12-NEXT:    s_endpgm
+; GFX1200-LABEL: generic_store_volatile:
+; GFX1200:       ; %bb.0:
+; GFX1200-NEXT:    global_store_b32 v[1:2], v0, off scope:SCOPE_SYS
+; GFX1200-NEXT:    s_wait_storecnt 0x0
+; GFX1200-NEXT:    s_endpgm
+;
+; GFX1250-SDAG-LABEL: generic_store_volatile:
+; GFX1250-SDAG:       ; %bb.0:
+; GFX1250-SDAG-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-SDAG-NEXT:    global_store_b32 v[2:3], v0, off scope:SCOPE_SYS
+; GFX1250-SDAG-NEXT:    s_wait_storecnt 0x0
+; GFX1250-SDAG-NEXT:    s_endpgm
+;
+; GFX1250-GISEL-LABEL: generic_store_volatile:
+; GFX1250-GISEL:       ; %bb.0:
+; GFX1250-GISEL-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX1250-GISEL-NEXT:    global_store_b32 v[4:5], v0, off scope:SCOPE_SYS
+; GFX1250-GISEL-NEXT:    s_wait_storecnt 0x0
+; GFX1250-GISEL-NEXT:    s_endpgm
   store volatile i32 %val, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll b/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll
index b57d90c..19b9b53 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shufflevector-reverse.ll
@@ -6,10 +6,9 @@ define void @shufflevector_reverse_v32i8(ptr %res, ptr %a) nounwind {
 ; CHECK-LABEL: shufflevector_reverse_v32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI0_0)
 ; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT:    xvshuf.b $xr0, $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvshuf4i.w $xr0, $xr0, 27
+; CHECK-NEXT:    xvshuf4i.b $xr0, $xr0, 27
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -23,11 +22,9 @@ define void @shufflevector_reverse_v16i16(ptr %res, ptr %a) nounwind {
 ; CHECK-LABEL: shufflevector_reverse_v16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT:    xvshuf.h $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvst $xr1, $a0, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 27
+; CHECK-NEXT:    xvshuf4i.h $xr0, $xr0, 27
+; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
   %va = load <16 x i16>, ptr %a
diff --git a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll
index 29f038a..a7b59e5 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/shufflevector-reverse.ll
@@ -6,9 +6,8 @@ define void @shufflevector_reverse_v16i8(ptr %res, ptr %a) nounwind {
 ; CHECK-LABEL: shufflevector_reverse_v16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT:    vld $vr1, $a1, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT:    vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 27
+; CHECK-NEXT:    vshuf4i.b $vr0, $vr0, 27
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -22,10 +21,9 @@ define void @shufflevector_reverse_v8i16(ptr %res, ptr %a) nounwind {
 ; CHECK-LABEL: shufflevector_reverse_v8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vld $vr0, $a1, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT:    vld $vr1, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT:    vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT:    vst $vr1, $a0, 0
+; CHECK-NEXT:    vshuf4i.d $vr0, $vr0, 1
+; CHECK-NEXT:    vshuf4i.h $vr0, $vr0, 27
+; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
   %va = load <8 x i16>, ptr %a
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_arr.ll b/llvm/test/CodeGen/NVPTX/mbarrier_arr.ll
new file mode 100644
index 0000000..c440caa
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_arr.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; === space_cta (addrspace 3) ===
+define void @test_mbarrier_arrive_scope_cta_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cta_space_cta(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cta_space_cta_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cta_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cta_space_cta(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_scope_cta_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %r1 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  ret void
+}
+
+define void @test_mbarrier_arrive_scope_cluster_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cluster_space_cta(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cluster_space_cta_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cta_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.release.cluster.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.release.cluster.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.release.cluster.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.release.cluster.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cluster_space_cta(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_scope_cluster_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.release.cluster.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.release.cluster.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.release.cluster.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.release.cluster.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %r1 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  ret void
+}
+
+; === space_cluster (addrspace 7) ===
+define void @test_mbarrier_arrive_scope_cta_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cta_space_cluster(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cta_space_cluster_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cluster_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cta_space_cluster(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cta_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_scope_cta_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  call void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  ret void
+}
+
+define void @test_mbarrier_arrive_scope_cluster_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_scope_cluster_space_cluster(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_scope_cluster_space_cluster_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cluster_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.release.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_scope_cluster_space_cluster(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_scope_cluster_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_scope_cluster_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.release.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  call void @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  ret void
+}
+
+declare i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+
+declare i64 @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_arr_relaxed.ll b/llvm/test/CodeGen/NVPTX/mbarrier_arr_relaxed.ll
new file mode 100644
index 0000000..e4d2aa2
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_arr_relaxed.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; === space_cta (addrspace 3) ===
+define void @test_mbarrier_arrive_relaxed_scope_cta_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cta(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.relaxed.cta.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.relaxed.cta.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.relaxed.cta.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cta.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cta(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cta_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.relaxed.cta.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.relaxed.cta.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.relaxed.cta.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cta.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %r1 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  ret void
+}
+
+define void @test_mbarrier_arrive_relaxed_scope_cluster_space_cta(ptr addrspace(3) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cta(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<6>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.relaxed.cluster.shared.b64 %rd2, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.relaxed.cluster.shared.b64 %rd3, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.relaxed.cluster.shared.b64 %rd4, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared.b64 %rd5, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cta(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<5>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cluster_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.relaxed.cluster.shared.b64 %rd1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.relaxed.cluster.shared.b64 %rd2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.relaxed.cluster.shared.b64 %rd3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared.b64 %rd4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %r1 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r2 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  %r4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx)
+  ret void
+}
+
+; === space_cluster (addrspace 7) ===
+define void @test_mbarrier_arrive_relaxed_scope_cta_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cluster(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cta.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cta_space_cluster(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cta_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cta.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  call void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  ret void
+}
+
+define void @test_mbarrier_arrive_relaxed_scope_cluster_space_cluster(ptr addrspace(7) %mbar, i32 %tx) {
+; CHECK-PTX64-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cluster(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.arrive.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_arrive_relaxed_scope_cluster_space_cluster(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_arrive_relaxed_scope_cluster_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.arrive_drop.expect_tx.relaxed.cluster.shared::cluster.b64 _, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  call void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx)
+  ret void
+}
+
+declare i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+
+declare i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7), i32)
+
+declare void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7), i32)
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_tx.ll b/llvm/test/CodeGen/NVPTX/mbarrier_tx.ll
new file mode 100644
index 0000000..441ade3
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_tx.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cta(ptr addrspace(3), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cluster(ptr addrspace(7), i32)
+declare void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cluster(ptr addrspace(7), i32)
+
+define void @test_mbarrier_tx_space_cta(ptr addrspace(3) %mbar, i32 %tx_count) {
+; CHECK-PTX64-LABEL: test_mbarrier_tx_space_cta(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_tx_space_cta_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_tx_space_cta_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.expect_tx.relaxed.cta.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.expect_tx.relaxed.cluster.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.complete_tx.relaxed.cta.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.complete_tx.relaxed.cluster.shared.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_tx_space_cta(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_tx_space_cta_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_tx_space_cta_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.expect_tx.relaxed.cta.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.expect_tx.relaxed.cluster.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.complete_tx.relaxed.cta.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.complete_tx.relaxed.cluster.shared.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+  tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+
+  tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+  tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %tx_count)
+
+  ret void
+}
+
+define void @test_mbarrier_tx_space_cluster(ptr addrspace(7) %mbar, i32 %tx_count) {
+; CHECK-PTX64-LABEL: test_mbarrier_tx_space_cluster(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [test_mbarrier_tx_space_cluster_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [test_mbarrier_tx_space_cluster_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.expect_tx.relaxed.cta.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.expect_tx.relaxed.cluster.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.complete_tx.relaxed.cta.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.complete_tx.relaxed.cluster.shared::cluster.b64 [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: test_mbarrier_tx_space_cluster(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [test_mbarrier_tx_space_cluster_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [test_mbarrier_tx_space_cluster_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.expect_tx.relaxed.cta.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.expect_tx.relaxed.cluster.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.complete_tx.relaxed.cta.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.complete_tx.relaxed.cluster.shared::cluster.b64 [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+  tail call void @llvm.nvvm.mbarrier.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+
+  tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cta.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+  tail call void @llvm.nvvm.mbarrier.complete.tx.scope.cluster.space.cluster(ptr addrspace(7) %mbar, i32 %tx_count)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx70.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx70.ll
new file mode 100644
index 0000000..5130ae2
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx70.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70| %ptxas-verify -arch=sm_80 %}
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 --nvptx-short-ptr| %ptxas-verify -arch=sm_80 %}
+
+declare i1 @llvm.nvvm.mbarrier.test.wait.scope.cta.space.cta(ptr addrspace(3), i64)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i64 %state) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<2>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx71.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx71.ll
new file mode 100644
index 0000000..9327e79
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm80_ptx71.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71| %ptxas-verify -arch=sm_80 %}
+; RUN: %if ptxas-sm_80 && ptxas-isa-7.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --nvptx-short-ptr| %ptxas-verify -arch=sm_80 %}
+
+; --- test.wait.parity ---
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cta.space.cta(ptr addrspace(3), i32)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<2>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.parity.shared.b64 %p1, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<2>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.parity.shared.b64 %p1, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx78.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx78.ll
new file mode 100644
index 0000000..9b19ad5
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx78.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-7.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx78 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; --- try.wait without timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.scope.cta.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cta.space.cta(ptr addrspace(3), i32)
+
+; --- try.wait with timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cta.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cta.space.cta(ptr addrspace(3), i32, i32)
+
+define void @mbar_try_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_try_wait(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_try_wait_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_try_wait_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.shared.b64 %p2, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_try_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_try_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.shared.b64 %p2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+  ret void
+}
+
+define void @mbar_try_wait_tl(ptr addrspace(3) %mbar, i64 %state, i32 %parity, i32 %tl) {
+; CHECK-PTX64-LABEL: mbar_try_wait_tl(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_try_wait_tl_param_1];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_try_wait_tl_param_3];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.shared.b64 %p1, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r2, [mbar_try_wait_tl_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.shared.b64 %p2, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait_tl(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<4>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_tl_param_1];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_try_wait_tl_param_3];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.shared.b64 %p1, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r3, [mbar_try_wait_tl_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.shared.b64 %p2, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx80.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx80.ll
new file mode 100644
index 0000000..034953d
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx80.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx80 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; with sm-90 and ptx-80, we have support for cluster-scope
+
+; --- test.wait ---
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.test.wait.scope.cluster.space.cta(ptr addrspace(3), i64)
+
+; --- try.wait without timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.scope.cluster.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+; --- try.wait with timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cluster.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cluster.space.cta(ptr addrspace(3), i32, i32)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.acquire.cluster.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_test_wait_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.parity.acquire.cluster.shared.b64 %p2, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.acquire.cluster.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_test_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.parity.acquire.cluster.shared.b64 %p2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+  ret void
+}
+
+define void @mbar_try_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_try_wait(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_try_wait_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_try_wait_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_try_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_try_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+  ret void
+}
+
+define void @mbar_try_wait_tl(ptr addrspace(3) %mbar, i64 %state, i32 %parity, i32 %tl) {
+; CHECK-PTX64-LABEL: mbar_try_wait_tl(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_try_wait_tl_param_1];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_try_wait_tl_param_3];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r2, [mbar_try_wait_tl_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait_tl(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<4>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_tl_param_1];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_try_wait_tl_param_3];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.acquire.cluster.shared.b64 %p1, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r3, [mbar_try_wait_tl_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.acquire.cluster.shared.b64 %p2, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx86.ll b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx86.ll
new file mode 100644
index 0000000..652634b
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/mbarrier_wait_sm90_ptx86.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| FileCheck --check-prefixes=CHECK-PTX64 %s
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| FileCheck --check-prefixes=CHECK-PTX-SHARED32 %s
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86| %ptxas-verify -arch=sm_90 %}
+; RUN: %if ptxas-sm_90 && ptxas-isa-8.6 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx86 --nvptx-short-ptr| %ptxas-verify -arch=sm_90 %}
+
+; --- test.wait ---
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cta.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3), i64)
+
+; --- try.wait without timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cta.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3), i64)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3), i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32)
+
+; --- try.wait with timelimit ---
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cta.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3), i64, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cta.space.cta(ptr addrspace(3), i32, i32)
+declare i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3), i32, i32)
+
+define void @mbar_test_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_test_wait(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<5>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_test_wait_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_test_wait_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.relaxed.cta.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.relaxed.cluster.shared.b64 %p2, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_test_wait_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.parity.relaxed.cta.shared.b64 %p3, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.test_wait.parity.relaxed.cluster.shared.b64 %p4, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_test_wait(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<5>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_test_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_test_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.relaxed.cta.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.relaxed.cluster.shared.b64 %p2, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_test_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.parity.relaxed.cta.shared.b64 %p3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.test_wait.parity.relaxed.cluster.shared.b64 %p4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.test.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+
+  %ret2 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+  %ret3 = call i1 @llvm.nvvm.mbarrier.test.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+  ret void
+}
+
+define void @mbar_try_wait(ptr addrspace(3) %mbar, i64 %state, i32 %parity) {
+; CHECK-PTX64-LABEL: mbar_try_wait(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<5>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<2>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_try_wait_param_1];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%rd1], %rd2;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_try_wait_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%rd1], %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<5>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_try_wait_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_param_1];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%r1], %rd1;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_try_wait_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%r1], %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state)
+
+  %ret2 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+  %ret3 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity)
+
+  ret void
+}
+
+define void @mbar_try_wait_tl(ptr addrspace(3) %mbar, i64 %state, i32 %parity, i32 %tl) {
+; CHECK-PTX64-LABEL: mbar_try_wait_tl(
+; CHECK-PTX64:       {
+; CHECK-PTX64-NEXT:    .reg .pred %p<5>;
+; CHECK-PTX64-NEXT:    .reg .b32 %r<3>;
+; CHECK-PTX64-NEXT:    .reg .b64 %rd<3>;
+; CHECK-PTX64-EMPTY:
+; CHECK-PTX64-NEXT:  // %bb.0:
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX64-NEXT:    ld.param.b64 %rd2, [mbar_try_wait_tl_param_1];
+; CHECK-PTX64-NEXT:    ld.param.b32 %r1, [mbar_try_wait_tl_param_3];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%rd1], %rd2, %r1;
+; CHECK-PTX64-NEXT:    ld.param.b32 %r2, [mbar_try_wait_tl_param_2];
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT:    mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%rd1], %r2, %r1;
+; CHECK-PTX64-NEXT:    ret;
+;
+; CHECK-PTX-SHARED32-LABEL: mbar_try_wait_tl(
+; CHECK-PTX-SHARED32:       {
+; CHECK-PTX-SHARED32-NEXT:    .reg .pred %p<5>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b32 %r<4>;
+; CHECK-PTX-SHARED32-NEXT:    .reg .b64 %rd<2>;
+; CHECK-PTX-SHARED32-EMPTY:
+; CHECK-PTX-SHARED32-NEXT:  // %bb.0:
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r1, [mbar_try_wait_tl_param_0];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b64 %rd1, [mbar_try_wait_tl_param_1];
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r2, [mbar_try_wait_tl_param_3];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.relaxed.cta.shared.b64 %p1, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.relaxed.cluster.shared.b64 %p2, [%r1], %rd1, %r2;
+; CHECK-PTX-SHARED32-NEXT:    ld.param.b32 %r3, [mbar_try_wait_tl_param_2];
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.relaxed.cta.shared.b64 %p3, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT:    mbarrier.try_wait.parity.relaxed.cluster.shared.b64 %p4, [%r1], %r3, %r2;
+; CHECK-PTX-SHARED32-NEXT:    ret;
+  %ret0 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+  %ret1 = call i1 @llvm.nvvm.mbarrier.try.wait.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i64 %state, i32 %tl)
+
+  %ret2 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cta.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+  %ret3 = call i1 @llvm.nvvm.mbarrier.try.wait.parity.tl.relaxed.scope.cluster.space.cta(ptr addrspace(3) %mbar, i32 %parity, i32 %tl)
+
+  ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
index 4ca2dc5db..eba9faa 100644
--- a/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-spill-crun.ll
@@ -234,8 +234,8 @@ define dso_local void @P10_Spill_CR_UN(ptr %arg, ptr %arg1, i32 %arg2) local_unn
 ; CHECK-BE-NEXT:  # %bb.4: # %bb37
 ; CHECK-BE-NEXT:    bc 4, 4*cr5+lt, .LBB0_14
 ; CHECK-BE-NEXT:  .LBB0_5: # %bb42
-; CHECK-BE-NEXT:    addi r3, r3, global_1@toc@l
 ; CHECK-BE-NEXT:    li r4, 0
+; CHECK-BE-NEXT:    addi r3, r3, global_1@toc@l
 ; CHECK-BE-NEXT:    cmpwi r28, 0
 ; CHECK-BE-NEXT:    isel r3, r3, r4, 4*cr2+gt
 ; CHECK-BE-NEXT:    crnot 4*cr2+lt, eq
diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
index d506d20..e5d305f 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
@@ -1085,14 +1085,14 @@ define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_add
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI17_2@toc@ha
 ; PWR10BE-NEXT:    vperm v3, v2, v2, v3
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI17_2@toc@l
-; PWR10BE-NEXT:    vextsb2w v3, v3
 ; PWR10BE-NEXT:    lxv v5, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI17_3@toc@ha
+; PWR10BE-NEXT:    vextsb2w v3, v3
 ; PWR10BE-NEXT:    vperm v4, v2, v2, v4
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI17_3@toc@l
-; PWR10BE-NEXT:    vextsb2w v4, v4
 ; PWR10BE-NEXT:    lxv v0, 0(r3)
 ; PWR10BE-NEXT:    li r3, 0
+; PWR10BE-NEXT:    vextsb2w v4, v4
 ; PWR10BE-NEXT:    vperm v5, v2, v2, v5
 ; PWR10BE-NEXT:    vadduwm v3, v4, v3
 ; PWR10BE-NEXT:    vextsb2w v5, v5
@@ -1212,9 +1212,9 @@ define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_add
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI18_3@toc@ha
 ; PWR10BE-NEXT:    vperm v5, v4, v2, v5
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI18_3@toc@l
-; PWR10BE-NEXT:    vadduwm v3, v5, v3
 ; PWR10BE-NEXT:    lxv v1, 0(r3)
 ; PWR10BE-NEXT:    li r3, 0
+; PWR10BE-NEXT:    vadduwm v3, v5, v3
 ; PWR10BE-NEXT:    vperm v0, v4, v2, v0
 ; PWR10BE-NEXT:    vperm v2, v4, v2, v1
 ; PWR10BE-NEXT:    vadduwm v2, v2, v0
@@ -1568,41 +1568,41 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_0@toc@ha
 ; PWR10BE-NEXT:    xxspltib v1, 255
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI23_0@toc@l
-; PWR10BE-NEXT:    vsrq v1, v1, v1
 ; PWR10BE-NEXT:    lxv v3, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_1@toc@ha
+; PWR10BE-NEXT:    vsrq v1, v1, v1
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI23_1@toc@l
 ; PWR10BE-NEXT:    vperm v1, v2, v2, v1
 ; PWR10BE-NEXT:    lxv v4, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_2@toc@ha
-; PWR10BE-NEXT:    vextsb2d v1, v1
 ; PWR10BE-NEXT:    vperm v3, v2, v2, v3
+; PWR10BE-NEXT:    vextsb2d v1, v1
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI23_2@toc@l
-; PWR10BE-NEXT:    vextsb2d v3, v3
 ; PWR10BE-NEXT:    lxv v5, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_3@toc@ha
+; PWR10BE-NEXT:    vextsb2d v3, v3
 ; PWR10BE-NEXT:    vperm v4, v2, v2, v4
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI23_3@toc@l
-; PWR10BE-NEXT:    vextsb2d v4, v4
 ; PWR10BE-NEXT:    lxv v0, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_4@toc@ha
+; PWR10BE-NEXT:    vextsb2d v4, v4
 ; PWR10BE-NEXT:    vperm v5, v2, v2, v5
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI23_4@toc@l
-; PWR10BE-NEXT:    vextsb2d v5, v5
 ; PWR10BE-NEXT:    lxv v6, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_5@toc@ha
+; PWR10BE-NEXT:    vextsb2d v5, v5
 ; PWR10BE-NEXT:    vperm v0, v2, v2, v0
 ; PWR10BE-NEXT:    addi r3, r3, .LCPI23_5@toc@l
-; PWR10BE-NEXT:    vextsb2d v0, v0
 ; PWR10BE-NEXT:    lxv v7, 0(r3)
 ; PWR10BE-NEXT:    addis r3, r2, .LCPI23_6@toc@ha
+; PWR10BE-NEXT:    vextsb2d v0, v0
 ; PWR10BE-NEXT:    vperm v6, v2, v2, v6
+; PWR10BE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
 ; PWR10BE-NEXT:    vaddudm v5, v0, v5
 ; PWR10BE-NEXT:    vaddudm v3, v4, v3
 ; PWR10BE-NEXT:    vaddudm v3, v3, v5
-; PWR10BE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
-; PWR10BE-NEXT:    vextsb2d v6, v6
 ; PWR10BE-NEXT:    lxv v8, 0(r3)
+; PWR10BE-NEXT:    vextsb2d v6, v6
 ; PWR10BE-NEXT:    vperm v7, v2, v2, v7
 ; PWR10BE-NEXT:    vextsb2d v7, v7
 ; PWR10BE-NEXT:    vperm v2, v2, v2, v8
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vle.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vle.ll
new file mode 100644
index 0000000..3a74bcd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vle.ll
@@ -0,0 +1,1596 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
+  <vscale x 1 x i64>,
+  ptr,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vle_v_nxv1i64_nxv1i64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1i64_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
+    <vscale x 1 x i64> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64(
+  <vscale x 1 x i64>,
+  ptr,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vle_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1i64_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vle.mask.nxv1i64(
+    <vscale x 1 x i64> %0,
+    ptr %1,
+    <vscale x 1 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64(
+  <vscale x 2 x i64>,
+  ptr,
+  iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vle_v_nxv2i64_nxv2i64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2i64_nxv2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64(
+    <vscale x 2 x i64> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vle.mask.nxv2i64(
+  <vscale x 2 x i64>,
+  ptr,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vle_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2i64_nxv2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i64> @llvm.riscv.vle.mask.nxv2i64(
+    <vscale x 2 x i64> %0,
+    ptr %1,
+    <vscale x 2 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64(
+  <vscale x 4 x i64>,
+  ptr,
+  iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vle_v_nxv4i64_nxv4i64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4i64_nxv4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64(
+    <vscale x 4 x i64> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vle.mask.nxv4i64(
+  <vscale x 4 x i64>,
+  ptr,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vle_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4i64_nxv4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i64> @llvm.riscv.vle.mask.nxv4i64(
+    <vscale x 4 x i64> %0,
+    ptr %1,
+    <vscale x 4 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64(
+  <vscale x 8 x i64>,
+  ptr,
+  iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vle_v_nxv8i64_nxv8i64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8i64_nxv8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64(
+    <vscale x 8 x i64> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vle.mask.nxv8i64(
+  <vscale x 8 x i64>,
+  ptr,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vle_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8i64_nxv8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vle.mask.nxv8i64(
+    <vscale x 8 x i64> %0,
+    ptr %1,
+    <vscale x 8 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vle.nxv1f64(
+  <vscale x 1 x double>,
+  ptr,
+  iXLen);
+
+define <vscale x 1 x double> @intrinsic_vle_v_nxv1f64_nxv1f64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64(
+    <vscale x 1 x double> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vle.mask.nxv1f64(
+  <vscale x 1 x double>,
+  ptr,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x double> @intrinsic_vle_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vle.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    ptr %1,
+    <vscale x 1 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vle.nxv2f64(
+  <vscale x 2 x double>,
+  ptr,
+  iXLen);
+
+define <vscale x 2 x double> @intrinsic_vle_v_nxv2f64_nxv2f64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64(
+    <vscale x 2 x double> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vle.mask.nxv2f64(
+  <vscale x 2 x double>,
+  ptr,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x double> @intrinsic_vle_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vle.mask.nxv2f64(
+    <vscale x 2 x double> %0,
+    ptr %1,
+    <vscale x 2 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vle.nxv4f64(
+  <vscale x 4 x double>,
+  ptr,
+  iXLen);
+
+define <vscale x 4 x double> @intrinsic_vle_v_nxv4f64_nxv4f64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vle.nxv4f64(
+    <vscale x 4 x double> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vle.mask.nxv4f64(
+  <vscale x 4 x double>,
+  ptr,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x double> @intrinsic_vle_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vle.mask.nxv4f64(
+    <vscale x 4 x double> %0,
+    ptr %1,
+    <vscale x 4 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vle.nxv8f64(
+  <vscale x 8 x double>,
+  ptr,
+  iXLen);
+
+define <vscale x 8 x double> @intrinsic_vle_v_nxv8f64_nxv8f64(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vle.nxv8f64(
+    <vscale x 8 x double> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vle.mask.nxv8f64(
+  <vscale x 8 x double>,
+  ptr,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x double> @intrinsic_vle_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vle.mask.nxv8f64(
+    <vscale x 8 x double> %0,
+    ptr %1,
+    <vscale x 8 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32(
+  <vscale x 1 x i32>,
+  ptr,
+  iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vle_v_nxv1i32_nxv1i32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1i32_nxv1i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32(
+    <vscale x 1 x i32> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vle.mask.nxv1i32(
+  <vscale x 1 x i32>,
+  ptr,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vle_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1i32_nxv1i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i32> @llvm.riscv.vle.mask.nxv1i32(
+    <vscale x 1 x i32> %0,
+    ptr %1,
+    <vscale x 1 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(
+  <vscale x 2 x i32>,
+  ptr,
+  iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vle_v_nxv2i32_nxv2i32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2i32_nxv2i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(
+    <vscale x 2 x i32> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vle.mask.nxv2i32(
+  <vscale x 2 x i32>,
+  ptr,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vle_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2i32_nxv2i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i32> @llvm.riscv.vle.mask.nxv2i32(
+    <vscale x 2 x i32> %0,
+    ptr %1,
+    <vscale x 2 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(
+  <vscale x 4 x i32>,
+  ptr,
+  iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vle_v_nxv4i32_nxv4i32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4i32_nxv4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(
+    <vscale x 4 x i32> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vle.mask.nxv4i32(
+  <vscale x 4 x i32>,
+  ptr,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vle_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4i32_nxv4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i32> @llvm.riscv.vle.mask.nxv4i32(
+    <vscale x 4 x i32> %0,
+    ptr %1,
+    <vscale x 4 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32(
+  <vscale x 8 x i32>,
+  ptr,
+  iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vle_v_nxv8i32_nxv8i32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8i32_nxv8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32(
+    <vscale x 8 x i32> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vle.mask.nxv8i32(
+  <vscale x 8 x i32>,
+  ptr,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vle_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8i32_nxv8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vle.mask.nxv8i32(
+    <vscale x 8 x i32> %0,
+    ptr %1,
+    <vscale x 8 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32(
+  <vscale x 16 x i32>,
+  ptr,
+  iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vle_v_nxv16i32_nxv16i32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv16i32_nxv16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32(
+    <vscale x 16 x i32> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vle.mask.nxv16i32(
+  <vscale x 16 x i32>,
+  ptr,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vle_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv16i32_nxv16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i32> @llvm.riscv.vle.mask.nxv16i32(
+    <vscale x 16 x i32> %0,
+    ptr %1,
+    <vscale x 16 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vle.nxv1f32(
+  <vscale x 1 x float>,
+  ptr,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vle_v_nxv1f32_nxv1f32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vle.nxv1f32(
+    <vscale x 1 x float> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vle.mask.nxv1f32(
+  <vscale x 1 x float>,
+  ptr,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vle_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vle.mask.nxv1f32(
+    <vscale x 1 x float> %0,
+    ptr %1,
+    <vscale x 1 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vle.nxv2f32(
+  <vscale x 2 x float>,
+  ptr,
+  iXLen);
+
+define <vscale x 2 x float> @intrinsic_vle_v_nxv2f32_nxv2f32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32(
+    <vscale x 2 x float> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vle.mask.nxv2f32(
+  <vscale x 2 x float>,
+  ptr,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x float> @intrinsic_vle_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vle.mask.nxv2f32(
+    <vscale x 2 x float> %0,
+    ptr %1,
+    <vscale x 2 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vle.nxv4f32(
+  <vscale x 4 x float>,
+  ptr,
+  iXLen);
+
+define <vscale x 4 x float> @intrinsic_vle_v_nxv4f32_nxv4f32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vle.nxv4f32(
+    <vscale x 4 x float> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vle.mask.nxv4f32(
+  <vscale x 4 x float>,
+  ptr,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x float> @intrinsic_vle_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vle.mask.nxv4f32(
+    <vscale x 4 x float> %0,
+    ptr %1,
+    <vscale x 4 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vle.nxv8f32(
+  <vscale x 8 x float>,
+  ptr,
+  iXLen);
+
+define <vscale x 8 x float> @intrinsic_vle_v_nxv8f32_nxv8f32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vle.nxv8f32(
+    <vscale x 8 x float> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vle.mask.nxv8f32(
+  <vscale x 8 x float>,
+  ptr,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x float> @intrinsic_vle_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vle.mask.nxv8f32(
+    <vscale x 8 x float> %0,
+    ptr %1,
+    <vscale x 8 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32(
+  <vscale x 16 x float>,
+  ptr,
+  iXLen);
+
+define <vscale x 16 x float> @intrinsic_vle_v_nxv16f32_nxv16f32(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32(
+    <vscale x 16 x float> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vle.mask.nxv16f32(
+  <vscale x 16 x float>,
+  ptr,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x float> @intrinsic_vle_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vle.mask.nxv16f32(
+    <vscale x 16 x float> %0,
+    ptr %1,
+    <vscale x 16 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16(
+  <vscale x 1 x i16>,
+  ptr,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vle_v_nxv1i16_nxv1i16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1i16_nxv1i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16(
+    <vscale x 1 x i16> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vle.mask.nxv1i16(
+  <vscale x 1 x i16>,
+  ptr,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vle_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1i16_nxv1i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vle.mask.nxv1i16(
+    <vscale x 1 x i16> %0,
+    ptr %1,
+    <vscale x 1 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(
+  <vscale x 2 x i16>,
+  ptr,
+  iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vle_v_nxv2i16_nxv2i16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2i16_nxv2i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(
+    <vscale x 2 x i16> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vle.mask.nxv2i16(
+  <vscale x 2 x i16>,
+  ptr,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vle_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2i16_nxv2i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i16> @llvm.riscv.vle.mask.nxv2i16(
+    <vscale x 2 x i16> %0,
+    ptr %1,
+    <vscale x 2 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16(
+  <vscale x 4 x i16>,
+  ptr,
+  iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vle_v_nxv4i16_nxv4i16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16(
+    <vscale x 4 x i16> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vle.mask.nxv4i16(
+  <vscale x 4 x i16>,
+  ptr,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vle_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i16> @llvm.riscv.vle.mask.nxv4i16(
+    <vscale x 4 x i16> %0,
+    ptr %1,
+    <vscale x 4 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16(
+  <vscale x 8 x i16>,
+  ptr,
+  iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vle_v_nxv8i16_nxv8i16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16(
+    <vscale x 8 x i16> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vle.mask.nxv8i16(
+  <vscale x 8 x i16>,
+  ptr,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vle_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i16> @llvm.riscv.vle.mask.nxv8i16(
+    <vscale x 8 x i16> %0,
+    ptr %1,
+    <vscale x 8 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16(
+  <vscale x 16 x i16>,
+  ptr,
+  iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vle_v_nxv16i16_nxv16i16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16(
+    <vscale x 16 x i16> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vle.mask.nxv16i16(
+  <vscale x 16 x i16>,
+  ptr,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vle_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i16> @llvm.riscv.vle.mask.nxv16i16(
+    <vscale x 16 x i16> %0,
+    ptr %1,
+    <vscale x 16 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16(
+  <vscale x 32 x i16>,
+  ptr,
+  iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vle_v_nxv32i16_nxv32i16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16(
+    <vscale x 32 x i16> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vle.mask.nxv32i16(
+  <vscale x 32 x i16>,
+  ptr,
+  <vscale x 32 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vle_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i16> @llvm.riscv.vle.mask.nxv32i16(
+    <vscale x 32 x i16> %0,
+    ptr %1,
+    <vscale x 32 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vle.nxv1f16(
+  <vscale x 1 x half>,
+  ptr,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vle_v_nxv1f16_nxv1f16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vle.nxv1f16(
+    <vscale x 1 x half> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vle.mask.nxv1f16(
+  <vscale x 1 x half>,
+  ptr,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vle_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vle.mask.nxv1f16(
+    <vscale x 1 x half> %0,
+    ptr %1,
+    <vscale x 1 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vle.nxv2f16(
+  <vscale x 2 x half>,
+  ptr,
+  iXLen);
+
+define <vscale x 2 x half> @intrinsic_vle_v_nxv2f16_nxv2f16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vle.nxv2f16(
+    <vscale x 2 x half> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vle.mask.nxv2f16(
+  <vscale x 2 x half>,
+  ptr,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x half> @intrinsic_vle_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vle.mask.nxv2f16(
+    <vscale x 2 x half> %0,
+    ptr %1,
+    <vscale x 2 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vle.nxv4f16(
+  <vscale x 4 x half>,
+  ptr,
+  iXLen);
+
+define <vscale x 4 x half> @intrinsic_vle_v_nxv4f16_nxv4f16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vle.nxv4f16(
+    <vscale x 4 x half> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vle.mask.nxv4f16(
+  <vscale x 4 x half>,
+  ptr,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x half> @intrinsic_vle_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vle.mask.nxv4f16(
+    <vscale x 4 x half> %0,
+    ptr %1,
+    <vscale x 4 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vle.nxv8f16(
+  <vscale x 8 x half>,
+  ptr,
+  iXLen);
+
+define <vscale x 8 x half> @intrinsic_vle_v_nxv8f16_nxv8f16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vle.nxv8f16(
+    <vscale x 8 x half> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vle.mask.nxv8f16(
+  <vscale x 8 x half>,
+  ptr,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x half> @intrinsic_vle_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vle.mask.nxv8f16(
+    <vscale x 8 x half> %0,
+    ptr %1,
+    <vscale x 8 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vle.nxv16f16(
+  <vscale x 16 x half>,
+  ptr,
+  iXLen);
+
+define <vscale x 16 x half> @intrinsic_vle_v_nxv16f16_nxv16f16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vle.nxv16f16(
+    <vscale x 16 x half> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vle.mask.nxv16f16(
+  <vscale x 16 x half>,
+  ptr,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x half> @intrinsic_vle_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vle.mask.nxv16f16(
+    <vscale x 16 x half> %0,
+    ptr %1,
+    <vscale x 16 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vle.nxv32f16(
+  <vscale x 32 x half>,
+  ptr,
+  iXLen);
+
+define <vscale x 32 x half> @intrinsic_vle_v_nxv32f16_nxv32f16(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vle.nxv32f16(
+    <vscale x 32 x half> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vle.mask.nxv32f16(
+  <vscale x 32 x half>,
+  ptr,
+  <vscale x 32 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x half> @intrinsic_vle_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
+; CHECK-NEXT:    vle16.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vle.mask.nxv32f16(
+    <vscale x 32 x half> %0,
+    ptr %1,
+    <vscale x 32 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+  <vscale x 1 x i8>,
+  ptr,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vle_v_nxv1i8_nxv1i8(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv1i8_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+    <vscale x 1 x i8> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vle.mask.nxv1i8(
+  <vscale x 1 x i8>,
+  ptr,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vle_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv1i8_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vle.mask.nxv1i8(
+    <vscale x 1 x i8> %0,
+    ptr %1,
+    <vscale x 1 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8(
+  <vscale x 2 x i8>,
+  ptr,
+  iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vle_v_nxv2i8_nxv2i8(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv2i8_nxv2i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8(
+    <vscale x 2 x i8> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vle.mask.nxv2i8(
+  <vscale x 2 x i8>,
+  ptr,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vle_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv2i8_nxv2i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i8> @llvm.riscv.vle.mask.nxv2i8(
+    <vscale x 2 x i8> %0,
+    ptr %1,
+    <vscale x 2 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8(
+  <vscale x 4 x i8>,
+  ptr,
+  iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vle_v_nxv4i8_nxv4i8(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8(
+    <vscale x 4 x i8> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vle.mask.nxv4i8(
+  <vscale x 4 x i8>,
+  ptr,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vle_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i8> @llvm.riscv.vle.mask.nxv4i8(
+    <vscale x 4 x i8> %0,
+    ptr %1,
+    <vscale x 4 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8(
+  <vscale x 8 x i8>,
+  ptr,
+  iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vle_v_nxv8i8_nxv8i8(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8(
+    <vscale x 8 x i8> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vle.mask.nxv8i8(
+  <vscale x 8 x i8>,
+  ptr,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vle_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i8> @llvm.riscv.vle.mask.nxv8i8(
+    <vscale x 8 x i8> %0,
+    ptr %1,
+    <vscale x 8 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8(
+  <vscale x 16 x i8>,
+  ptr,
+  iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vle_v_nxv16i8_nxv16i8(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8(
+    <vscale x 16 x i8> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vle.mask.nxv16i8(
+  <vscale x 16 x i8>,
+  ptr,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vle_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i8> @llvm.riscv.vle.mask.nxv16i8(
+    <vscale x 16 x i8> %0,
+    ptr %1,
+    <vscale x 16 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8(
+  <vscale x 32 x i8>,
+  ptr,
+  iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vle_v_nxv32i8_nxv32i8(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8(
+    <vscale x 32 x i8> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vle.mask.nxv32i8(
+  <vscale x 32 x i8>,
+  ptr,
+  <vscale x 32 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vle_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i8> @llvm.riscv.vle.mask.nxv32i8(
+    <vscale x 32 x i8> %0,
+    ptr %1,
+    <vscale x 32 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8(
+  <vscale x 64 x i8>,
+  ptr,
+  iXLen);
+
+define <vscale x 64 x i8> @intrinsic_vle_v_nxv64i8_nxv64i8(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vle_v_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8(
+    <vscale x 64 x i8> poison,
+    ptr %0,
+    iXLen %1)
+
+  ret <vscale x 64 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vle.mask.nxv64i8(
+  <vscale x 64 x i8>,
+  ptr,
+  <vscale x 64 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 64 x i8> @intrinsic_vle_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, <vscale x 64 x i1> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vle_mask_v_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 64 x i8> @llvm.riscv.vle.mask.nxv64i8(
+    <vscale x 64 x i8> %0,
+    ptr %1,
+    <vscale x 64 x i1> %2,
+    iXLen %3, iXLen 1)
+
+  ret <vscale x 64 x i8> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vlm.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vlm.ll
new file mode 100644
index 0000000..1227963
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vlm.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -global-isel -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -global-isel -verify-machineinstrs | FileCheck %s
+
+declare <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(ptr, iXLen);
+
+define <vscale x 1 x i1> @intrinsic_vlm_v_nxv1i1(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vlm_v_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(ptr %0, iXLen %1)
+  ret <vscale x 1 x i1> %a
+}
+
+declare <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(ptr, iXLen);
+
+define <vscale x 2 x i1> @intrinsic_vlm_v_nxv2i1(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vlm_v_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(ptr %0, iXLen %1)
+  ret <vscale x 2 x i1> %a
+}
+
+declare <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(ptr, iXLen);
+
+define <vscale x 4 x i1> @intrinsic_vlm_v_nxv4i1(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vlm_v_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(ptr %0, iXLen %1)
+  ret <vscale x 4 x i1> %a
+}
+
+declare <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(ptr, iXLen);
+
+define <vscale x 8 x i1> @intrinsic_vlm_v_nxv8i1(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vlm_v_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(ptr %0, iXLen %1)
+  ret <vscale x 8 x i1> %a
+}
+
+declare <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(ptr, iXLen);
+
+define <vscale x 16 x i1> @intrinsic_vlm_v_nxv16i1(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vlm_v_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(ptr %0, iXLen %1)
+  ret <vscale x 16 x i1> %a
+}
+
+declare <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(ptr, iXLen);
+
+define <vscale x 32 x i1> @intrinsic_vlm_v_nxv32i1(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vlm_v_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(ptr %0, iXLen %1)
+  ret <vscale x 32 x i1> %a
+}
+
+declare <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(ptr, iXLen);
+
+define <vscale x 64 x i1> @intrinsic_vlm_v_nxv64i1(ptr %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vlm_v_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(ptr %0, iXLen %1)
+  ret <vscale x 64 x i1> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vlse.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vlse.ll
new file mode 100644
index 0000000..14abfa1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/vlse.ll
@@ -0,0 +1,1744 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN:   -global-isel -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin \
+; RUN:   -global-isel -verify-machineinstrs -target-abi=lp64d | FileCheck %s
+
+declare <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
+  <vscale x 1 x i64>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vlse_v_nxv1i64_nxv1i64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv1i64_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
+    <vscale x 1 x i64> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vlse.mask.nxv1i64(
+  <vscale x 1 x i64>,
+  ptr,
+  iXLen,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i64> @intrinsic_vlse_mask_v_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1i64_nxv1i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i64> @llvm.riscv.vlse.mask.nxv1i64(
+    <vscale x 1 x i64> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64(
+  <vscale x 2 x i64>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vlse_v_nxv2i64_nxv2i64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv2i64_nxv2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64(
+    <vscale x 2 x i64> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vlse.mask.nxv2i64(
+  <vscale x 2 x i64>,
+  ptr,
+  iXLen,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i64> @intrinsic_vlse_mask_v_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2i64_nxv2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i64> @llvm.riscv.vlse.mask.nxv2i64(
+    <vscale x 2 x i64> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64(
+  <vscale x 4 x i64>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vlse_v_nxv4i64_nxv4i64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv4i64_nxv4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64(
+    <vscale x 4 x i64> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vlse.mask.nxv4i64(
+  <vscale x 4 x i64>,
+  ptr,
+  iXLen,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i64> @intrinsic_vlse_mask_v_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4i64_nxv4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i64> @llvm.riscv.vlse.mask.nxv4i64(
+    <vscale x 4 x i64> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64(
+  <vscale x 8 x i64>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vlse_v_nxv8i64_nxv8i64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv8i64_nxv8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64(
+    <vscale x 8 x i64> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vlse.mask.nxv8i64(
+  <vscale x 8 x i64>,
+  ptr,
+  iXLen,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i64> @intrinsic_vlse_mask_v_nxv8i64_nxv8i64(<vscale x 8 x i64> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8i64_nxv8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i64> @llvm.riscv.vlse.mask.nxv8i64(
+    <vscale x 8 x i64> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64(
+  <vscale x 1 x double>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x double> @intrinsic_vlse_v_nxv1f64_nxv1f64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64(
+    <vscale x 1 x double> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vlse.mask.nxv1f64(
+  <vscale x 1 x double>,
+  ptr,
+  iXLen,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x double> @intrinsic_vlse_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vlse.mask.nxv1f64(
+    <vscale x 1 x double> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64(
+  <vscale x 2 x double>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x double> @intrinsic_vlse_v_nxv2f64_nxv2f64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64(
+    <vscale x 2 x double> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vlse.mask.nxv2f64(
+  <vscale x 2 x double>,
+  ptr,
+  iXLen,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x double> @intrinsic_vlse_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vlse.mask.nxv2f64(
+    <vscale x 2 x double> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64(
+  <vscale x 4 x double>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x double> @intrinsic_vlse_v_nxv4f64_nxv4f64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64(
+    <vscale x 4 x double> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vlse.mask.nxv4f64(
+  <vscale x 4 x double>,
+  ptr,
+  iXLen,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x double> @intrinsic_vlse_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vlse.mask.nxv4f64(
+    <vscale x 4 x double> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64(
+  <vscale x 8 x double>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x double> @intrinsic_vlse_v_nxv8f64_nxv8f64(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT:    vlse64.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64(
+    <vscale x 8 x double> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vlse.mask.nxv8f64(
+  <vscale x 8 x double>,
+  ptr,
+  iXLen,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x double> @intrinsic_vlse_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vlse.mask.nxv8f64(
+    <vscale x 8 x double> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32(
+  <vscale x 1 x i32>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vlse_v_nxv1i32_nxv1i32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv1i32_nxv1i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32(
+    <vscale x 1 x i32> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 1 x i32> @llvm.riscv.vlse.mask.nxv1i32(
+  <vscale x 1 x i32>,
+  ptr,
+  iXLen,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i32> @intrinsic_vlse_mask_v_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1i32_nxv1i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i32> @llvm.riscv.vlse.mask.nxv1i32(
+    <vscale x 1 x i32> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 1 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(
+  <vscale x 2 x i32>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vlse_v_nxv2i32_nxv2i32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv2i32_nxv2i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(
+    <vscale x 2 x i32> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 2 x i32> @llvm.riscv.vlse.mask.nxv2i32(
+  <vscale x 2 x i32>,
+  ptr,
+  iXLen,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i32> @intrinsic_vlse_mask_v_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2i32_nxv2i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i32> @llvm.riscv.vlse.mask.nxv2i32(
+    <vscale x 2 x i32> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 2 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32(
+  <vscale x 4 x i32>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vlse_v_nxv4i32_nxv4i32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv4i32_nxv4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32(
+    <vscale x 4 x i32> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 4 x i32> @llvm.riscv.vlse.mask.nxv4i32(
+  <vscale x 4 x i32>,
+  ptr,
+  iXLen,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i32> @intrinsic_vlse_mask_v_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4i32_nxv4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i32> @llvm.riscv.vlse.mask.nxv4i32(
+    <vscale x 4 x i32> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 4 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32(
+  <vscale x 8 x i32>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vlse_v_nxv8i32_nxv8i32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv8i32_nxv8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32(
+    <vscale x 8 x i32> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 8 x i32> @llvm.riscv.vlse.mask.nxv8i32(
+  <vscale x 8 x i32>,
+  ptr,
+  iXLen,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i32> @intrinsic_vlse_mask_v_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8i32_nxv8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i32> @llvm.riscv.vlse.mask.nxv8i32(
+    <vscale x 8 x i32> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 8 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32(
+  <vscale x 16 x i32>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vlse_v_nxv16i32_nxv16i32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv16i32_nxv16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32(
+    <vscale x 16 x i32> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.vlse.mask.nxv16i32(
+  <vscale x 16 x i32>,
+  ptr,
+  iXLen,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i32> @intrinsic_vlse_mask_v_nxv16i32_nxv16i32(<vscale x 16 x i32> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv16i32_nxv16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i32> @llvm.riscv.vlse.mask.nxv16i32(
+    <vscale x 16 x i32> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 16 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 16 x i32> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32(
+  <vscale x 1 x float>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vlse_v_nxv1f32_nxv1f32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32(
+    <vscale x 1 x float> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vlse.mask.nxv1f32(
+  <vscale x 1 x float>,
+  ptr,
+  iXLen,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x float> @intrinsic_vlse_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vlse.mask.nxv1f32(
+    <vscale x 1 x float> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32(
+  <vscale x 2 x float>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x float> @intrinsic_vlse_v_nxv2f32_nxv2f32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32(
+    <vscale x 2 x float> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vlse.mask.nxv2f32(
+  <vscale x 2 x float>,
+  ptr,
+  iXLen,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x float> @intrinsic_vlse_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vlse.mask.nxv2f32(
+    <vscale x 2 x float> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32(
+  <vscale x 4 x float>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x float> @intrinsic_vlse_v_nxv4f32_nxv4f32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32(
+    <vscale x 4 x float> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vlse.mask.nxv4f32(
+  <vscale x 4 x float>,
+  ptr,
+  iXLen,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x float> @intrinsic_vlse_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vlse.mask.nxv4f32(
+    <vscale x 4 x float> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32(
+  <vscale x 8 x float>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x float> @intrinsic_vlse_v_nxv8f32_nxv8f32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32(
+    <vscale x 8 x float> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vlse.mask.nxv8f32(
+  <vscale x 8 x float>,
+  ptr,
+  iXLen,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x float> @intrinsic_vlse_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m4, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vlse.mask.nxv8f32(
+    <vscale x 8 x float> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32(
+  <vscale x 16 x float>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x float> @intrinsic_vlse_v_nxv16f32_nxv16f32(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT:    vlse32.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32(
+    <vscale x 16 x float> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vlse.mask.nxv16f32(
+  <vscale x 16 x float>,
+  ptr,
+  iXLen,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x float> @intrinsic_vlse_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vlse.mask.nxv16f32(
+    <vscale x 16 x float> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 16 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16(
+  <vscale x 1 x i16>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vlse_v_nxv1i16_nxv1i16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv1i16_nxv1i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16(
+    <vscale x 1 x i16> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 1 x i16> @llvm.riscv.vlse.mask.nxv1i16(
+  <vscale x 1 x i16>,
+  ptr,
+  iXLen,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i16> @intrinsic_vlse_mask_v_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1i16_nxv1i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i16> @llvm.riscv.vlse.mask.nxv1i16(
+    <vscale x 1 x i16> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 1 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16(
+  <vscale x 2 x i16>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vlse_v_nxv2i16_nxv2i16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv2i16_nxv2i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16(
+    <vscale x 2 x i16> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 2 x i16> @llvm.riscv.vlse.mask.nxv2i16(
+  <vscale x 2 x i16>,
+  ptr,
+  iXLen,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i16> @intrinsic_vlse_mask_v_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2i16_nxv2i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i16> @llvm.riscv.vlse.mask.nxv2i16(
+    <vscale x 2 x i16> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 2 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16(
+  <vscale x 4 x i16>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vlse_v_nxv4i16_nxv4i16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16(
+    <vscale x 4 x i16> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 4 x i16> @llvm.riscv.vlse.mask.nxv4i16(
+  <vscale x 4 x i16>,
+  ptr,
+  iXLen,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i16> @intrinsic_vlse_mask_v_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4i16_nxv4i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i16> @llvm.riscv.vlse.mask.nxv4i16(
+    <vscale x 4 x i16> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 4 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16(
+  <vscale x 8 x i16>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vlse_v_nxv8i16_nxv8i16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16(
+    <vscale x 8 x i16> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 8 x i16> @llvm.riscv.vlse.mask.nxv8i16(
+  <vscale x 8 x i16>,
+  ptr,
+  iXLen,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i16> @intrinsic_vlse_mask_v_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8i16_nxv8i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i16> @llvm.riscv.vlse.mask.nxv8i16(
+    <vscale x 8 x i16> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 8 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16(
+  <vscale x 16 x i16>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vlse_v_nxv16i16_nxv16i16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16(
+    <vscale x 16 x i16> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 16 x i16> @llvm.riscv.vlse.mask.nxv16i16(
+  <vscale x 16 x i16>,
+  ptr,
+  iXLen,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i16> @intrinsic_vlse_mask_v_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv16i16_nxv16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i16> @llvm.riscv.vlse.mask.nxv16i16(
+    <vscale x 16 x i16> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 16 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 16 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16(
+  <vscale x 32 x i16>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vlse_v_nxv32i16_nxv32i16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16(
+    <vscale x 32 x i16> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.vlse.mask.nxv32i16(
+  <vscale x 32 x i16>,
+  ptr,
+  iXLen,
+  <vscale x 32 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x i16> @intrinsic_vlse_mask_v_nxv32i16_nxv32i16(<vscale x 32 x i16> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv32i16_nxv32i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i16> @llvm.riscv.vlse.mask.nxv32i16(
+    <vscale x 32 x i16> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 32 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 32 x i16> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16(
+  <vscale x 1 x half>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vlse_v_nxv1f16_nxv1f16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16(
+    <vscale x 1 x half> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vlse.mask.nxv1f16(
+  <vscale x 1 x half>,
+  ptr,
+  iXLen,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x half> @intrinsic_vlse_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vlse.mask.nxv1f16(
+    <vscale x 1 x half> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16(
+  <vscale x 2 x half>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x half> @intrinsic_vlse_v_nxv2f16_nxv2f16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16(
+    <vscale x 2 x half> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vlse.mask.nxv2f16(
+  <vscale x 2 x half>,
+  ptr,
+  iXLen,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x half> @intrinsic_vlse_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vlse.mask.nxv2f16(
+    <vscale x 2 x half> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16(
+  <vscale x 4 x half>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x half> @intrinsic_vlse_v_nxv4f16_nxv4f16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16(
+    <vscale x 4 x half> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vlse.mask.nxv4f16(
+  <vscale x 4 x half>,
+  ptr,
+  iXLen,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x half> @intrinsic_vlse_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vlse.mask.nxv4f16(
+    <vscale x 4 x half> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16(
+  <vscale x 8 x half>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x half> @intrinsic_vlse_v_nxv8f16_nxv8f16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16(
+    <vscale x 8 x half> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vlse.mask.nxv8f16(
+  <vscale x 8 x half>,
+  ptr,
+  iXLen,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x half> @intrinsic_vlse_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m2, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vlse.mask.nxv8f16(
+    <vscale x 8 x half> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16(
+  <vscale x 16 x half>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x half> @intrinsic_vlse_v_nxv16f16_nxv16f16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16(
+    <vscale x 16 x half> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vlse.mask.nxv16f16(
+  <vscale x 16 x half>,
+  ptr,
+  iXLen,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x half> @intrinsic_vlse_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vlse.mask.nxv16f16(
+    <vscale x 16 x half> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 16 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16(
+  <vscale x 32 x half>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x half> @intrinsic_vlse_v_nxv32f16_nxv32f16(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
+; CHECK-NEXT:    vlse16.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16(
+    <vscale x 32 x half> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vlse.mask.nxv32f16(
+  <vscale x 32 x half>,
+  ptr,
+  iXLen,
+  <vscale x 32 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x half> @intrinsic_vlse_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, mu
+; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vlse.mask.nxv32f16(
+    <vscale x 32 x half> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 32 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8(
+  <vscale x 1 x i8>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vlse_v_nxv1i8_nxv1i8(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv1i8_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8(
+    <vscale x 1 x i8> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vlse.mask.nxv1i8(
+  <vscale x 1 x i8>,
+  ptr,
+  iXLen,
+  <vscale x 1 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vlse_mask_v_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, ptr %1, iXLen %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv1i8_nxv1i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, mu
+; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vlse.mask.nxv1i8(
+    <vscale x 1 x i8> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 1 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8(
+  <vscale x 2 x i8>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vlse_v_nxv2i8_nxv2i8(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv2i8_nxv2i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8(
+    <vscale x 2 x i8> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 2 x i8> @llvm.riscv.vlse.mask.nxv2i8(
+  <vscale x 2 x i8>,
+  ptr,
+  iXLen,
+  <vscale x 2 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 2 x i8> @intrinsic_vlse_mask_v_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, ptr %1, iXLen %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv2i8_nxv2i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, mu
+; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x i8> @llvm.riscv.vlse.mask.nxv2i8(
+    <vscale x 2 x i8> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 2 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 2 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8(
+  <vscale x 4 x i8>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vlse_v_nxv4i8_nxv4i8(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8(
+    <vscale x 4 x i8> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 4 x i8> @llvm.riscv.vlse.mask.nxv4i8(
+  <vscale x 4 x i8>,
+  ptr,
+  iXLen,
+  <vscale x 4 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 4 x i8> @intrinsic_vlse_mask_v_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, ptr %1, iXLen %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv4i8_nxv4i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, mu
+; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x i8> @llvm.riscv.vlse.mask.nxv4i8(
+    <vscale x 4 x i8> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 4 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 4 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8(
+  <vscale x 8 x i8>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vlse_v_nxv8i8_nxv8i8(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8(
+    <vscale x 8 x i8> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 8 x i8> @llvm.riscv.vlse.mask.nxv8i8(
+  <vscale x 8 x i8>,
+  ptr,
+  iXLen,
+  <vscale x 8 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 8 x i8> @intrinsic_vlse_mask_v_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, ptr %1, iXLen %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv8i8_nxv8i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
+; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x i8> @llvm.riscv.vlse.mask.nxv8i8(
+    <vscale x 8 x i8> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 8 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 8 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8(
+  <vscale x 16 x i8>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vlse_v_nxv16i8_nxv16i8(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8(
+    <vscale x 16 x i8> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 16 x i8> @llvm.riscv.vlse.mask.nxv16i8(
+  <vscale x 16 x i8>,
+  ptr,
+  iXLen,
+  <vscale x 16 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 16 x i8> @intrinsic_vlse_mask_v_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, ptr %1, iXLen %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv16i8_nxv16i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x i8> @llvm.riscv.vlse.mask.nxv16i8(
+    <vscale x 16 x i8> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 16 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 16 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8(
+  <vscale x 32 x i8>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vlse_v_nxv32i8_nxv32i8(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8(
+    <vscale x 32 x i8> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 32 x i8> @llvm.riscv.vlse.mask.nxv32i8(
+  <vscale x 32 x i8>,
+  ptr,
+  iXLen,
+  <vscale x 32 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 32 x i8> @intrinsic_vlse_mask_v_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, ptr %1, iXLen %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv32i8_nxv32i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x i8> @llvm.riscv.vlse.mask.nxv32i8(
+    <vscale x 32 x i8> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 32 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 32 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8(
+  <vscale x 64 x i8>,
+  ptr,
+  iXLen,
+  iXLen);
+
+define <vscale x 64 x i8> @intrinsic_vlse_v_nxv64i8_nxv64i8(ptr %0, iXLen %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vlse_v_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8(
+    <vscale x 64 x i8> poison,
+    ptr %0,
+    iXLen %1,
+    iXLen %2)
+
+  ret <vscale x 64 x i8> %a
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.vlse.mask.nxv64i8(
+  <vscale x 64 x i8>,
+  ptr,
+  iXLen,
+  <vscale x 64 x i1>,
+  iXLen,
+  iXLen);
+
+define <vscale x 64 x i8> @intrinsic_vlse_mask_v_nxv64i8_nxv64i8(<vscale x 64 x i8> %0, ptr %1, iXLen %2, <vscale x 64 x i1> %3, iXLen %4) nounwind {
+; CHECK-LABEL: intrinsic_vlse_mask_v_nxv64i8_nxv64i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 64 x i8> @llvm.riscv.vlse.mask.nxv64i8(
+    <vscale x 64 x i8> %0,
+    ptr %1,
+    iXLen %2,
+    <vscale x 64 x i1> %3,
+    iXLen %4, iXLen 1)
+
+  ret <vscale x 64 x i8> %a
+}
diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
index faa119c..5f0682a 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
 
 define void @undef_2phi(ptr%buf) {
 ; CHECK-LABEL: @undef_2phi(
diff --git a/llvm/test/CodeGen/X86/AMX/amx-combine.ll b/llvm/test/CodeGen/X86/AMX/amx-combine.ll
index 07f489c..72e072dd 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-combine.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-combine.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
 
 define void @combine_store(ptr%p) {
 ; CHECK-LABEL: @combine_store(
diff --git a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
index 6c536f1..4ac406c 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -lower-amx-type -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -x86-lower-amx-type -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -passes=x86-lower-amx-type -S | FileCheck %s
 
 @buf = dso_local global [1024 x i8] zeroinitializer, align 16
 @buf2 = dso_local global [1024 x i8] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/AMX/amx-type.ll b/llvm/test/CodeGen/X86/AMX/amx-type.ll
index 1d9af2b..294195a 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-type.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-type.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
 
 %struct.__tile_str = type { i16, i16, <256 x i32> }
 
diff --git a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
index b70668f..cdce783 100644
--- a/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
 
 define void @combine_amx_cast_inside_bb() {
 ; CHECK-LABEL: @combine_amx_cast_inside_bb(
diff --git a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
index 3a5b4245..0b419bb 100644
--- a/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/AMX/lat-transform-amx-bitcast.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
 
 %struct.__tile_str = type { i16, i16, <256 x i32> }
 
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
index 52641c6..3549875 100644
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
+++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O0.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-  ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+  ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+  ; RUN: opt --codegen-opt-level=0 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
 
   @buf = dso_local global [2048 x i8] zeroinitializer, align 16
 
diff --git a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
index 346d46b..96966264 100644
--- a/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
+++ b/llvm/test/CodeGen/X86/amx_tile_pair_lower_type_O2.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
+; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
 
   @buf = dso_local global [2048 x i8] zeroinitializer, align 16
   @buf2 = dso_local global [2048 x i8] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/trunc-srl-load.ll b/llvm/test/CodeGen/X86/trunc-srl-load.ll
new file mode 100644
index 0000000..4dae143
--- /dev/null
+++ b/llvm/test/CodeGen/X86/trunc-srl-load.ll
@@ -0,0 +1,1672 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown                   | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64    | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
+
+; Tests showing for the analysis of non-constant shift amounts to improve load address math
+
+; Alignment of shift amounts should allow sub-integer loads.
+
+define i16 @extractSub64_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub64_16:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl (%eax), %edx
+; X86-NEXT:    movl 4(%eax), %esi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    andb $16, %cl
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    shrdl %cl, %esi, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    jne .LBB0_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:  .LBB0_2:
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; SSE-LABEL: extractSub64_16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movl %esi, %ecx
+; SSE-NEXT:    movq (%rdi), %rax
+; SSE-NEXT:    andb $48, %cl
+; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT:    shrq %cl, %rax
+; SSE-NEXT:    # kill: def $ax killed $ax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extractSub64_16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX-NEXT:    andb $48, %sil
+; AVX-NEXT:    shrxq %rsi, (%rdi), %rax
+; AVX-NEXT:    # kill: def $ax killed $ax killed $rax
+; AVX-NEXT:    retq
+  %idx_bounds = and i32 %idx, 63
+  %idx_align = and i32 %idx_bounds, -16
+  %sh = zext nneg i32 %idx_align to i64
+  %ld = load i64, ptr %word, align 8
+  %sub = lshr i64 %ld, %sh
+  %res = trunc i64 %sub to i16
+  ret i16 %res
+}
+
+define i16 @extractSub128_16(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_16:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    movzbl 12(%ebp), %eax
+; X86-NEXT:    movl 8(%ebp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %esi
+; X86-NEXT:    movl 8(%ecx), %edi
+; X86-NEXT:    movl 12(%ecx), %ecx
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, (%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    andb $16, %cl
+; X86-NEXT:    shrb $3, %al
+; X86-NEXT:    andb $12, %al
+; X86-NEXT:    movzbl %al, %edx
+; X86-NEXT:    movl (%esp,%edx), %eax
+; X86-NEXT:    movl 4(%esp,%edx), %edx
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    leal -8(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: extractSub128_16:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq (%rdi), %rax
+; SSE-NEXT:    movq 8(%rdi), %rdx
+; SSE-NEXT:    movl %esi, %ecx
+; SSE-NEXT:    andb $48, %cl
+; SSE-NEXT:    movq %rdx, %rdi
+; SSE-NEXT:    shrq %cl, %rdi
+; SSE-NEXT:    shrdq %cl, %rdx, %rax
+; SSE-NEXT:    testb $64, %sil
+; SSE-NEXT:    cmovneq %rdi, %rax
+; SSE-NEXT:    # kill: def $ax killed $ax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extractSub128_16:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq (%rdi), %rdx
+; AVX-NEXT:    movq 8(%rdi), %rax
+; AVX-NEXT:    movl %esi, %ecx
+; AVX-NEXT:    andb $48, %cl
+; AVX-NEXT:    shrdq %cl, %rax, %rdx
+; AVX-NEXT:    shrxq %rcx, %rax, %rax
+; AVX-NEXT:    testb $64, %sil
+; AVX-NEXT:    cmoveq %rdx, %rax
+; AVX-NEXT:    # kill: def $ax killed $ax killed $rax
+; AVX-NEXT:    retq
+  %idx_bounds = and i32 %idx, 127
+  %idx_align = and i32 %idx_bounds, -16
+  %sh = zext nneg i32 %idx_align to i128
+  %ld = load i128, ptr %word, align 8
+  %sub = lshr i128 %ld, %sh
+  %res = trunc i128 %sub to i16
+  ret i16 %res
+}
+
+define i32 @extractSub128_32(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    movzbl 12(%ebp), %eax
+; X86-NEXT:    movl 8(%ebp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %esi
+; X86-NEXT:    movl 8(%ecx), %edi
+; X86-NEXT:    movl 12(%ecx), %ecx
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, (%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    andb $96, %al
+; X86-NEXT:    shrb $3, %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    movl (%esp,%eax), %eax
+; X86-NEXT:    leal -8(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: extractSub128_32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq (%rdi), %rax
+; SSE-NEXT:    movq 8(%rdi), %rdx
+; SSE-NEXT:    movl %esi, %ecx
+; SSE-NEXT:    andb $32, %cl
+; SSE-NEXT:    movq %rdx, %rdi
+; SSE-NEXT:    shrq %cl, %rdi
+; SSE-NEXT:    shrdq %cl, %rdx, %rax
+; SSE-NEXT:    testb $64, %sil
+; SSE-NEXT:    cmovneq %rdi, %rax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extractSub128_32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movq (%rdi), %rdx
+; AVX-NEXT:    movq 8(%rdi), %rax
+; AVX-NEXT:    movl %esi, %ecx
+; AVX-NEXT:    andb $32, %cl
+; AVX-NEXT:    shrdq %cl, %rax, %rdx
+; AVX-NEXT:    shrxq %rcx, %rax, %rax
+; AVX-NEXT:    testb $64, %sil
+; AVX-NEXT:    cmoveq %rdx, %rax
+; AVX-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX-NEXT:    retq
+  %idx_bounds = and i32 %idx, 127
+  %idx_align = and i32 %idx_bounds, -32
+  %sh = zext nneg i32 %idx_align to i128
+  %ld = load i128, ptr %word, align 8
+  %sub = lshr i128 %ld, %sh
+  %res = trunc i128 %sub to i32
+  ret i32 %res
+}
+
+define i64 @extractSub128_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub128_64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    movzbl 12(%ebp), %eax
+; X86-NEXT:    movl 8(%ebp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %esi
+; X86-NEXT:    movl 8(%ecx), %edi
+; X86-NEXT:    movl 12(%ecx), %ecx
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, (%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    andb $64, %al
+; X86-NEXT:    shrb $3, %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    movl (%esp,%ecx), %eax
+; X86-NEXT:    movl 4(%esp,%ecx), %edx
+; X86-NEXT:    leal -8(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: extractSub128_64:
+; X64:       # %bb.0:
+; X64-NEXT:    testb $64, %sil
+; X64-NEXT:    je .LBB3_1
+; X64-NEXT:  # %bb.2:
+; X64-NEXT:    movq 8(%rdi), %rax
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB3_1:
+; X64-NEXT:    movq (%rdi), %rax
+; X64-NEXT:    retq
+  %idx_bounds = and i32 %idx, 127
+  %idx_align = and i32 %idx_bounds, -64
+  %sh = zext nneg i32 %idx_align to i128
+  %ld = load i128, ptr %word, align 8
+  %sub = lshr i128 %ld, %sh
+  %res = trunc i128 %sub to i64
+  ret i64 %res
+}
+
+define i8 @extractSub512_8(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_8:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $192, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl (%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 4(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 8(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 12(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 16(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 20(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 24(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 28(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 32(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 36(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 40(%eax), %ebx
+; X86-NEXT:    movl 44(%eax), %edi
+; X86-NEXT:    movl 48(%eax), %esi
+; X86-NEXT:    movl 52(%eax), %edx
+; X86-NEXT:    movl 56(%eax), %ecx
+; X86-NEXT:    movl 60(%eax), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    andl $24, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    shrl $3, %edx
+; X86-NEXT:    andl $60, %edx
+; X86-NEXT:    movl 48(%esp,%edx), %eax
+; X86-NEXT:    movl 52(%esp,%edx), %edx
+; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: extractSub512_8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE-NEXT:    movups (%rdi), %xmm0
+; SSE-NEXT:    movups 16(%rdi), %xmm1
+; SSE-NEXT:    movups 32(%rdi), %xmm2
+; SSE-NEXT:    movups 48(%rdi), %xmm3
+; SSE-NEXT:    xorps %xmm4, %xmm4
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm3, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movl %esi, %ecx
+; SSE-NEXT:    andl $56, %ecx
+; SSE-NEXT:    shrl $3, %esi
+; SSE-NEXT:    andl $56, %esi
+; SSE-NEXT:    movq -128(%rsp,%rsi), %rdx
+; SSE-NEXT:    shrq %cl, %rdx
+; SSE-NEXT:    movl -120(%rsp,%rsi), %eax
+; SSE-NEXT:    addl %eax, %eax
+; SSE-NEXT:    notl %ecx
+; SSE-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SSE-NEXT:    shlq %cl, %rax
+; SSE-NEXT:    orl %edx, %eax
+; SSE-NEXT:    # kill: def $al killed $al killed $rax
+; SSE-NEXT:    popq %rcx
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: extractSub512_8:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %rax
+; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT:    vmovups (%rdi), %ymm0
+; AVX2-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    movl %esi, %ecx
+; AVX2-NEXT:    andl $56, %ecx
+; AVX2-NEXT:    shrl $3, %esi
+; AVX2-NEXT:    andl $56, %esi
+; AVX2-NEXT:    shrxq %rcx, -128(%rsp,%rsi), %rax
+; AVX2-NEXT:    # kill: def $ecx killed $ecx killed $rcx def $rcx
+; AVX2-NEXT:    notl %ecx
+; AVX2-NEXT:    movl -120(%rsp,%rsi), %edx
+; AVX2-NEXT:    addl %edx, %edx
+; AVX2-NEXT:    shlxq %rcx, %rdx, %rcx
+; AVX2-NEXT:    orl %ecx, %eax
+; AVX2-NEXT:    # kill: def $al killed $al killed $rax
+; AVX2-NEXT:    popq %rcx
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: extractSub512_8:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %rax
+; AVX512-NEXT:    vmovups (%rdi), %ymm0
+; AVX512-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX512-NEXT:    movl %esi, %ecx
+; AVX512-NEXT:    andl $56, %ecx
+; AVX512-NEXT:    shrl $3, %esi
+; AVX512-NEXT:    andl $56, %esi
+; AVX512-NEXT:    shrxq %rcx, -128(%rsp,%rsi), %rax
+; AVX512-NEXT:    # kill: def $ecx killed $ecx killed $rcx def $rcx
+; AVX512-NEXT:    notl %ecx
+; AVX512-NEXT:    movl -120(%rsp,%rsi), %edx
+; AVX512-NEXT:    addl %edx, %edx
+; AVX512-NEXT:    shlxq %rcx, %rdx, %rcx
+; AVX512-NEXT:    orl %ecx, %eax
+; AVX512-NEXT:    # kill: def $al killed $al killed $rax
+; AVX512-NEXT:    popq %rcx
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %idx_bounds = and i32 %idx, 511
+  %idx_align = and i32 %idx_bounds, -8
+  %ld = load i512, ptr %word, align 8
+  %sh = zext nneg i32 %idx_align to i512
+  %sub = lshr i512 %ld, %sh
+  %res = trunc i512 %sub to i8
+  ret i8 %res
+}
+
+define i64 @extractSub512_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $192, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl (%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 4(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 8(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 12(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 16(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 20(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 24(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 28(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 32(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 36(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 40(%eax), %ebx
+; X86-NEXT:    movl 44(%eax), %edi
+; X86-NEXT:    movl 48(%eax), %esi
+; X86-NEXT:    movl 52(%eax), %edx
+; X86-NEXT:    movl 56(%eax), %ecx
+; X86-NEXT:    movl 60(%eax), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    shrl $3, %ecx
+; X86-NEXT:    andl $56, %ecx
+; X86-NEXT:    movl 48(%esp,%ecx), %eax
+; X86-NEXT:    movl 52(%esp,%ecx), %edx
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: extractSub512_64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE-NEXT:    movups (%rdi), %xmm0
+; SSE-NEXT:    movups 16(%rdi), %xmm1
+; SSE-NEXT:    movups 32(%rdi), %xmm2
+; SSE-NEXT:    movups 48(%rdi), %xmm3
+; SSE-NEXT:    xorps %xmm4, %xmm4
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm3, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    shrl $3, %esi
+; SSE-NEXT:    andl $56, %esi
+; SSE-NEXT:    movq -128(%rsp,%rsi), %rax
+; SSE-NEXT:    popq %rcx
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: extractSub512_64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    pushq %rax
+; AVX2-NEXT:    vmovups (%rdi), %ymm0
+; AVX2-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT:    shrl $3, %esi
+; AVX2-NEXT:    andl $56, %esi
+; AVX2-NEXT:    movq -128(%rsp,%rsi), %rax
+; AVX2-NEXT:    popq %rcx
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: extractSub512_64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    pushq %rax
+; AVX512-NEXT:    vmovups (%rdi), %ymm0
+; AVX512-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    shrl $3, %esi
+; AVX512-NEXT:    andl $56, %esi
+; AVX512-NEXT:    movq -128(%rsp,%rsi), %rax
+; AVX512-NEXT:    popq %rcx
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %idx_bounds = and i32 %idx, 511
+  %idx_align = and i32 %idx_bounds, -64
+  %sh = zext nneg i32 %idx_align to i512
+  %ld = load i512, ptr %word, align 8
+  %sub = lshr i512 %ld, %sh
+  %res = trunc i512 %sub to i64
+  ret i64 %res
+}
+
+define i128 @extractSub512_128(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub512_128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $192, %esp
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl (%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 4(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 8(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 12(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 16(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 20(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 24(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 28(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 32(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 36(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 40(%eax), %ebx
+; X86-NEXT:    movl 44(%eax), %edi
+; X86-NEXT:    movl 48(%eax), %esi
+; X86-NEXT:    movl 52(%eax), %edx
+; X86-NEXT:    movl 56(%eax), %ecx
+; X86-NEXT:    movl 60(%eax), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    shrl $3, %edi
+; X86-NEXT:    andl $48, %edi
+; X86-NEXT:    movl 48(%esp,%edi), %ecx
+; X86-NEXT:    movl 52(%esp,%edi), %edx
+; X86-NEXT:    movl 56(%esp,%edi), %esi
+; X86-NEXT:    movl 60(%esp,%edi), %edi
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; SSE-LABEL: extractSub512_128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE-NEXT:    movups (%rdi), %xmm0
+; SSE-NEXT:    movups 16(%rdi), %xmm1
+; SSE-NEXT:    movups 32(%rdi), %xmm2
+; SSE-NEXT:    movups 48(%rdi), %xmm3
+; SSE-NEXT:    xorps %xmm4, %xmm4
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm3, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm2, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    shrl $3, %esi
+; SSE-NEXT:    andl $48, %esi
+; SSE-NEXT:    movq -128(%rsp,%rsi), %rax
+; SSE-NEXT:    movq -120(%rsp,%rsi), %rdx
+; SSE-NEXT:    popq %rcx
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extractSub512_128:
+; AVX:       # %bb.0:
+; AVX-NEXT:    pushq %rax
+; AVX-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX-NEXT:    vmovups (%rdi), %ymm0
+; AVX-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    shrl $3, %esi
+; AVX-NEXT:    andl $48, %esi
+; AVX-NEXT:    movq -128(%rsp,%rsi), %rax
+; AVX-NEXT:    movq -120(%rsp,%rsi), %rdx
+; AVX-NEXT:    popq %rcx
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %idx_bounds = and i32 %idx, 511
+  %idx_align = and i32 %idx_bounds, -128
+  %sh = zext nneg i32 %idx_align to i512
+  %ld = load i512, ptr %word, align 8
+  %sub = lshr i512 %ld, %sh
+  %res = trunc i512 %sub to i128
+  ret i128 %res
+}
+
+define i64 @extractSub4096_64(ptr %word, i32 %idx) nounwind {
+; X86-LABEL: extractSub4096_64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $1536, %esp # imm = 0x600
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl 4(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 8(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 12(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 16(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 20(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 24(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 28(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 32(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 36(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 40(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 44(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 48(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 52(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 56(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 60(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 64(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 68(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 72(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 76(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 80(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 84(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 88(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 92(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 96(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 100(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 104(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 108(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 112(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 116(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 120(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 124(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 128(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 132(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 136(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 140(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 144(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 148(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 152(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 156(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 160(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 164(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 168(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 172(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 176(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 180(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 184(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 188(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 192(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 196(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 200(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 204(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 208(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 212(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 216(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 220(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 224(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 228(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 232(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 236(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 240(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 244(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 248(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 252(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 256(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 260(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 264(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 268(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 272(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 276(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 280(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 284(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 288(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 292(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 296(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 300(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 304(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 308(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 312(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 316(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 320(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 324(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 328(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 332(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 336(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 340(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 344(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 348(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 352(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 356(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 360(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 364(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 368(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 372(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 376(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 380(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl (%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 384(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 388(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 392(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 396(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 400(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 404(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 408(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 412(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 416(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 420(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 424(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 428(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 432(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 436(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 440(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 444(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 448(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 452(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 456(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 460(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 464(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 468(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 472(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 476(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 480(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 484(%eax), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl 488(%eax), %ebx
+; X86-NEXT:    movl 492(%eax), %edi
+; X86-NEXT:    movl 496(%eax), %esi
+; X86-NEXT:    movl 500(%eax), %edx
+; X86-NEXT:    movl 504(%eax), %ecx
+; X86-NEXT:    movl 508(%eax), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $4032, %ecx # imm = 0xFC0
+; X86-NEXT:    andl 12(%ebp), %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    shrl $3, %ecx
+; X86-NEXT:    movl 496(%esp,%ecx), %eax
+; X86-NEXT:    movl 500(%esp,%ecx), %edx
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; SSE-LABEL: extractSub4096_64:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subq $1176, %rsp # imm = 0x498
+; SSE-NEXT:    # kill: def $esi killed $esi def $rsi
+; SSE-NEXT:    movups (%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 16(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 32(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 48(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 64(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 80(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 96(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 112(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 128(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT:    movups 144(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 160(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 176(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 192(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 208(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 224(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 240(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 256(%rdi), %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movups 272(%rdi), %xmm15
+; SSE-NEXT:    movups 288(%rdi), %xmm14
+; SSE-NEXT:    movups 304(%rdi), %xmm13
+; SSE-NEXT:    movups 320(%rdi), %xmm12
+; SSE-NEXT:    movups 336(%rdi), %xmm11
+; SSE-NEXT:    movups 352(%rdi), %xmm10
+; SSE-NEXT:    movups 368(%rdi), %xmm9
+; SSE-NEXT:    movups 384(%rdi), %xmm8
+; SSE-NEXT:    movups 400(%rdi), %xmm7
+; SSE-NEXT:    movups 416(%rdi), %xmm6
+; SSE-NEXT:    movups 432(%rdi), %xmm5
+; SSE-NEXT:    movups 448(%rdi), %xmm4
+; SSE-NEXT:    movups 464(%rdi), %xmm3
+; SSE-NEXT:    movups 480(%rdi), %xmm2
+; SSE-NEXT:    movups 496(%rdi), %xmm1
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm2, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm3, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm4, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm5, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm6, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm7, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm8, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm9, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm10, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm11, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm12, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm13, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm14, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps %xmm15, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; SSE-NEXT:    andl $4032, %esi # imm = 0xFC0
+; SSE-NEXT:    shrl $3, %esi
+; SSE-NEXT:    movq 144(%rsp,%rsi), %rax
+; SSE-NEXT:    addq $1176, %rsp # imm = 0x498
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: extractSub4096_64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    subq $936, %rsp # imm = 0x3A8
+; AVX2-NEXT:    vmovups (%rdi), %ymm0
+; AVX2-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX2-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX2-NEXT:    vmovups 64(%rdi), %ymm2
+; AVX2-NEXT:    vmovups 96(%rdi), %ymm3
+; AVX2-NEXT:    vmovups 128(%rdi), %ymm4
+; AVX2-NEXT:    vmovups 160(%rdi), %ymm5
+; AVX2-NEXT:    vmovups 192(%rdi), %ymm6
+; AVX2-NEXT:    vmovups 224(%rdi), %ymm7
+; AVX2-NEXT:    vmovups 256(%rdi), %ymm8
+; AVX2-NEXT:    vmovups 288(%rdi), %ymm9
+; AVX2-NEXT:    vmovups 320(%rdi), %ymm10
+; AVX2-NEXT:    vmovups 352(%rdi), %ymm11
+; AVX2-NEXT:    vmovups 384(%rdi), %ymm12
+; AVX2-NEXT:    vmovups 416(%rdi), %ymm13
+; AVX2-NEXT:    vmovups 448(%rdi), %ymm14
+; AVX2-NEXT:    vmovups 480(%rdi), %ymm15
+; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm15, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm14, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm13, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm12, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm11, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm10, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm9, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm8, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm7, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm6, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm5, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm4, {{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm3, (%rsp)
+; AVX2-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX2-NEXT:    andl $4032, %esi # imm = 0xFC0
+; AVX2-NEXT:    shrl $3, %esi
+; AVX2-NEXT:    movq -96(%rsp,%rsi), %rax
+; AVX2-NEXT:    addq $936, %rsp # imm = 0x3A8
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: extractSub4096_64:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    subq $904, %rsp # imm = 0x388
+; AVX512-NEXT:    # kill: def $esi killed $esi def $rsi
+; AVX512-NEXT:    vmovups (%rdi), %ymm0
+; AVX512-NEXT:    vmovups 32(%rdi), %ymm1
+; AVX512-NEXT:    vmovups 64(%rdi), %ymm2
+; AVX512-NEXT:    vmovups 96(%rdi), %ymm3
+; AVX512-NEXT:    vmovups 128(%rdi), %ymm4
+; AVX512-NEXT:    vmovups 160(%rdi), %ymm5
+; AVX512-NEXT:    vmovups 192(%rdi), %ymm6
+; AVX512-NEXT:    vmovups 224(%rdi), %ymm7
+; AVX512-NEXT:    vmovups 256(%rdi), %ymm8
+; AVX512-NEXT:    vmovups 288(%rdi), %ymm9
+; AVX512-NEXT:    vmovups 320(%rdi), %ymm10
+; AVX512-NEXT:    vmovups 352(%rdi), %ymm11
+; AVX512-NEXT:    vmovups 384(%rdi), %ymm12
+; AVX512-NEXT:    vmovups 416(%rdi), %ymm13
+; AVX512-NEXT:    andl $4032, %esi # imm = 0xFC0
+; AVX512-NEXT:    vmovups 448(%rdi), %ymm14
+; AVX512-NEXT:    vmovups 480(%rdi), %ymm15
+; AVX512-NEXT:    vxorps %xmm16, %xmm16, %xmm16
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm16, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm15, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm14, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm13, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm12, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm11, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm10, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm9, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm8, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm7, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm6, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm5, {{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm4, (%rsp)
+; AVX512-NEXT:    vmovups %ymm3, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm2, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm1, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
+; AVX512-NEXT:    shrl $3, %esi
+; AVX512-NEXT:    movq -128(%rsp,%rsi), %rax
+; AVX512-NEXT:    addq $904, %rsp # imm = 0x388
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %idx_bounds = and i32 %idx, 4095
+  %idx_align = and i32 %idx_bounds, -64
+  %sh = zext nneg i32 %idx_align to i4096
+  %ld = load i4096, ptr %word, align 8
+  %sub = lshr i4096 %ld, %sh
+  %res = trunc i4096 %sub to i64
+  ret i64 %res
+}
diff --git a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
index 35b7b04..5affe55 100644
--- a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
+++ b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -60,7 +60,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll b/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
index d21f1bf..4f9e67c 100644
--- a/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
+++ b/llvm/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
@@ -27,7 +27,7 @@ entry:
   ret void, !dbg !19
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+neon,+vfp3" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!8, !9, !10, !11}
diff --git a/llvm/test/DebugInfo/BPF/extern-void.ll b/llvm/test/DebugInfo/BPF/extern-void.ll
index 8fc8089..fb6700d1 100644
--- a/llvm/test/DebugInfo/BPF/extern-void.ll
+++ b/llvm/test/DebugInfo/BPF/extern-void.ll
@@ -42,7 +42,7 @@ entry:
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/array-odr-violation.ll b/llvm/test/DebugInfo/COFF/array-odr-violation.ll
index 0283515..f24c7d7 100644
--- a/llvm/test/DebugInfo/COFF/array-odr-violation.ll
+++ b/llvm/test/DebugInfo/COFF/array-odr-violation.ll
@@ -57,7 +57,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { noinline nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!2, !11}
diff --git a/llvm/test/DebugInfo/COFF/asan-module-ctor.ll b/llvm/test/DebugInfo/COFF/asan-module-ctor.ll
index 49c30e9..bc991a4 100644
--- a/llvm/test/DebugInfo/COFF/asan-module-ctor.ll
+++ b/llvm/test/DebugInfo/COFF/asan-module-ctor.ll
@@ -76,7 +76,7 @@ declare void @__asan_register_globals(i32, i32)
 
 declare void @__asan_unregister_globals(i32, i32)
 
-attributes #0 = { nounwind sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/COFF/asm.ll b/llvm/test/DebugInfo/COFF/asm.ll
index cf440bd..424ea96 100644
--- a/llvm/test/DebugInfo/COFF/asm.ll
+++ b/llvm/test/DebugInfo/COFF/asm.ll
@@ -145,8 +145,8 @@ entry:
 
 declare void @g() #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/class-options-common.ll b/llvm/test/DebugInfo/COFF/class-options-common.ll
index 70071461..014c86c 100644
--- a/llvm/test/DebugInfo/COFF/class-options-common.ll
+++ b/llvm/test/DebugInfo/COFF/class-options-common.ll
@@ -840,7 +840,7 @@ entry:
   ret void, !dbg !173
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/comdat.ll b/llvm/test/DebugInfo/COFF/comdat.ll
index 841630c..8c97afb 100644
--- a/llvm/test/DebugInfo/COFF/comdat.ll
+++ b/llvm/test/DebugInfo/COFF/comdat.ll
@@ -129,11 +129,11 @@ declare i32 @__C_specific_handler(...)
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #5
 
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { inlinehint noinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { inlinehint noinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #5 = { nounwind readnone }
 attributes #6 = { noinline }
 attributes #7 = { nounwind }
diff --git a/llvm/test/DebugInfo/COFF/cpp-mangling.ll b/llvm/test/DebugInfo/COFF/cpp-mangling.ll
index 994f575..257c97a 100644
--- a/llvm/test/DebugInfo/COFF/cpp-mangling.ll
+++ b/llvm/test/DebugInfo/COFF/cpp-mangling.ll
@@ -72,7 +72,7 @@ entry:
   ret void, !dbg !32
 }
 
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/defer-complete-type.ll b/llvm/test/DebugInfo/COFF/defer-complete-type.ll
index 012b234..6086092 100644
--- a/llvm/test/DebugInfo/COFF/defer-complete-type.ll
+++ b/llvm/test/DebugInfo/COFF/defer-complete-type.ll
@@ -172,7 +172,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/enum-co.ll b/llvm/test/DebugInfo/COFF/enum-co.ll
index b5ce797..a334b35 100644
--- a/llvm/test/DebugInfo/COFF/enum-co.ll
+++ b/llvm/test/DebugInfo/COFF/enum-co.ll
@@ -125,7 +125,7 @@ entry:
   ret void, !dbg !46
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/fpo-argsize.ll b/llvm/test/DebugInfo/COFF/fpo-argsize.ll
index 82eedd9..e4af66f 100644
--- a/llvm/test/DebugInfo/COFF/fpo-argsize.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-argsize.ll
@@ -285,7 +285,7 @@ entry:
   ret void, !dbg !159
 }
 
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/fpo-csrs.ll b/llvm/test/DebugInfo/COFF/fpo-csrs.ll
index c5b1dc9..8e99e1f 100644
--- a/llvm/test/DebugInfo/COFF/fpo-csrs.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-csrs.ll
@@ -581,8 +581,8 @@ entry:
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/fpo-funclet.ll b/llvm/test/DebugInfo/COFF/fpo-funclet.ll
index 302c4a2..260bdd2 100644
--- a/llvm/test/DebugInfo/COFF/fpo-funclet.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-funclet.ll
@@ -58,8 +58,8 @@ declare void @"\01?g@@YAXXZ"() local_unnamed_addr #1
 
 declare i32 @__CxxFrameHandler3(...)
 
-attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll b/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll
index 5bd19a0..d6f45b1 100644
--- a/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-realign-alloca.ll
@@ -76,9 +76,9 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #3
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
-attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #3 = { nounwind readnone speculatable }
 attributes #4 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll b/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll
index 5f1d6a5..15626a8 100644
--- a/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-realign-vframe.ll
@@ -191,10 +191,10 @@ declare dso_local void @usevals(ptr, ptr, ptr) local_unnamed_addr #3
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
index 9c6fb8a..4b76ec6 100644
--- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
@@ -126,8 +126,8 @@ declare i32 @doSomething(ptr) local_unnamed_addr #1
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
index d0d7249..9fcb591 100644
--- a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
@@ -74,10 +74,10 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/frameproc-flags.ll b/llvm/test/DebugInfo/COFF/frameproc-flags.ll
index 9054189..774e586 100644
--- a/llvm/test/DebugInfo/COFF/frameproc-flags.ll
+++ b/llvm/test/DebugInfo/COFF/frameproc-flags.ll
@@ -272,18 +272,18 @@ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) #2
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sspstrong "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { returns_twice }
-attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #6 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #7 = { inlinehint nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #8 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #6 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #7 = { inlinehint nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #8 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #9 = { nounwind readnone }
 attributes #10 = { nounwind }
-attributes #11 = { naked noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #11 = { naked noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #12 = { noinline }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/function-options.ll b/llvm/test/DebugInfo/COFF/function-options.ll
index c54c4d8..1cac141 100644
--- a/llvm/test/DebugInfo/COFF/function-options.ll
+++ b/llvm/test/DebugInfo/COFF/function-options.ll
@@ -625,9 +625,9 @@ entry:
   ret i32 0, !dbg !118
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable willreturn }
-attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/global-constants.ll b/llvm/test/DebugInfo/COFF/global-constants.ll
index c0e6afb..28fe071 100644
--- a/llvm/test/DebugInfo/COFF/global-constants.ll
+++ b/llvm/test/DebugInfo/COFF/global-constants.ll
@@ -91,8 +91,8 @@ entry:
 
 declare dso_local void @"?useConst@@YAXH@Z"(i32) #1
 
-attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26, !27, !28, !29}
diff --git a/llvm/test/DebugInfo/COFF/global_visibility.ll b/llvm/test/DebugInfo/COFF/global_visibility.ll
index 8da374d..499690a85 100644
--- a/llvm/test/DebugInfo/COFF/global_visibility.ll
+++ b/llvm/test/DebugInfo/COFF/global_visibility.ll
@@ -195,8 +195,8 @@ entry:
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
-attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/globals.ll b/llvm/test/DebugInfo/COFF/globals.ll
index 2724b99..9df4435 100644
--- a/llvm/test/DebugInfo/COFF/globals.ll
+++ b/llvm/test/DebugInfo/COFF/globals.ll
@@ -402,7 +402,7 @@ entry:
   ret ptr %this1, !dbg !83
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable willreturn }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/inheritance.ll b/llvm/test/DebugInfo/COFF/inheritance.ll
index e31d6ef..ae2f7f7 100644
--- a/llvm/test/DebugInfo/COFF/inheritance.ll
+++ b/llvm/test/DebugInfo/COFF/inheritance.ll
@@ -123,8 +123,8 @@ entry:
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/inlining-files.ll b/llvm/test/DebugInfo/COFF/inlining-files.ll
index 37edc6b..febd2a0 100644
--- a/llvm/test/DebugInfo/COFF/inlining-files.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-files.ll
@@ -77,7 +77,7 @@ entry:
   ret void, !dbg !30
 }
 
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!8, !9, !10}
diff --git a/llvm/test/DebugInfo/COFF/inlining-header.ll b/llvm/test/DebugInfo/COFF/inlining-header.ll
index 9a8200c..8580faa 100644
--- a/llvm/test/DebugInfo/COFF/inlining-header.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-header.ll
@@ -137,7 +137,7 @@ entry:
   ret i32 %5, !dbg !32
 }
 
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!8, !9, !10}
diff --git a/llvm/test/DebugInfo/COFF/inlining-levels.ll b/llvm/test/DebugInfo/COFF/inlining-levels.ll
index 8af1251..473b2df 100644
--- a/llvm/test/DebugInfo/COFF/inlining-levels.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-levels.ll
@@ -59,7 +59,7 @@ entry:
   ret i32 0, !dbg !28
 }
 
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!8, !9, !10}
diff --git a/llvm/test/DebugInfo/COFF/inlining-padding.ll b/llvm/test/DebugInfo/COFF/inlining-padding.ll
index 75254b3..eeba289 100644
--- a/llvm/test/DebugInfo/COFF/inlining-padding.ll
+++ b/llvm/test/DebugInfo/COFF/inlining-padding.ll
@@ -73,7 +73,7 @@ entry:
   ret i32 0, !dbg !29
 }
 
-attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/COFF/inlining.ll b/llvm/test/DebugInfo/COFF/inlining.ll
index 6953abc..901f53c 100644
--- a/llvm/test/DebugInfo/COFF/inlining.ll
+++ b/llvm/test/DebugInfo/COFF/inlining.ll
@@ -248,7 +248,7 @@ declare void @llvm.lifetime.start(i64, ptr nocapture) #1
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end(i64, ptr nocapture) #1
 
-attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/lambda.ll b/llvm/test/DebugInfo/COFF/lambda.ll
index ad7f954..7cd782d 100644
--- a/llvm/test/DebugInfo/COFF/lambda.ll
+++ b/llvm/test/DebugInfo/COFF/lambda.ll
@@ -103,9 +103,9 @@ entry:
   ret i32 %cond, !dbg !32
 }
 
-attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/lexicalblock.ll b/llvm/test/DebugInfo/COFF/lexicalblock.ll
index 40dd8f8..efcd61f 100644
--- a/llvm/test/DebugInfo/COFF/lexicalblock.ll
+++ b/llvm/test/DebugInfo/COFF/lexicalblock.ll
@@ -199,7 +199,7 @@ declare i32 @llvm.expect.i32(i32, i32) #3
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
 
-attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
 attributes #3 = { nounwind readnone }
diff --git a/llvm/test/DebugInfo/COFF/lines-difile.ll b/llvm/test/DebugInfo/COFF/lines-difile.ll
index 4c8e50b..973a2af 100644
--- a/llvm/test/DebugInfo/COFF/lines-difile.ll
+++ b/llvm/test/DebugInfo/COFF/lines-difile.ll
@@ -68,9 +68,9 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!13, !14, !15, !16}
diff --git a/llvm/test/DebugInfo/COFF/local-constant.ll b/llvm/test/DebugInfo/COFF/local-constant.ll
index cf306e7..a4f4c25 100644
--- a/llvm/test/DebugInfo/COFF/local-constant.ll
+++ b/llvm/test/DebugInfo/COFF/local-constant.ll
@@ -39,8 +39,8 @@ declare void @"\01?useint@@YAXH@Z"(i32) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/local-variable-gap.ll b/llvm/test/DebugInfo/COFF/local-variable-gap.ll
index bc1c313..ea7c9fd 100644
--- a/llvm/test/DebugInfo/COFF/local-variable-gap.ll
+++ b/llvm/test/DebugInfo/COFF/local-variable-gap.ll
@@ -133,9 +133,9 @@ declare void @use(i32) local_unnamed_addr #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #3
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #3 = { nounwind readnone }
 attributes #4 = { nounwind }
 attributes #5 = { noreturn nounwind }
diff --git a/llvm/test/DebugInfo/COFF/local-variables.ll b/llvm/test/DebugInfo/COFF/local-variables.ll
index 820f6bd..efcf5c3 100644
--- a/llvm/test/DebugInfo/COFF/local-variables.ll
+++ b/llvm/test/DebugInfo/COFF/local-variables.ll
@@ -250,9 +250,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @capture(ptr) #2
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/long-name.ll b/llvm/test/DebugInfo/COFF/long-name.ll
index 4ea5b43..0c3347a 100644
--- a/llvm/test/DebugInfo/COFF/long-name.ll
+++ b/llvm/test/DebugInfo/COFF/long-name.ll
@@ -36,7 +36,7 @@ entry:
   ret void, !dbg !10
 }
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/COFF/multifile.ll b/llvm/test/DebugInfo/COFF/multifile.ll
index 5123b1d..5f54db3 100644
--- a/llvm/test/DebugInfo/COFF/multifile.ll
+++ b/llvm/test/DebugInfo/COFF/multifile.ll
@@ -215,8 +215,8 @@ entry:
 
 declare void @g() #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/COFF/multifunction.ll b/llvm/test/DebugInfo/COFF/multifunction.ll
index 04c6436..2b753f7 100644
--- a/llvm/test/DebugInfo/COFF/multifunction.ll
+++ b/llvm/test/DebugInfo/COFF/multifunction.ll
@@ -680,8 +680,8 @@ entry:
   ret void, !dbg !21
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !12}
diff --git a/llvm/test/DebugInfo/COFF/nrvo.ll b/llvm/test/DebugInfo/COFF/nrvo.ll
index db16c11..6e25477 100644
--- a/llvm/test/DebugInfo/COFF/nrvo.ll
+++ b/llvm/test/DebugInfo/COFF/nrvo.ll
@@ -96,9 +96,9 @@ entry:
   ret i32 %0, !dbg !38
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline norecurse nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}
diff --git a/llvm/test/DebugInfo/COFF/parameter-order.ll b/llvm/test/DebugInfo/COFF/parameter-order.ll
index 23224aa..d240d61 100644
--- a/llvm/test/DebugInfo/COFF/parameter-order.ll
+++ b/llvm/test/DebugInfo/COFF/parameter-order.ll
@@ -81,7 +81,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/parent-type-scopes.ll b/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
index 95c7a55..727b7e4 100644
--- a/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
+++ b/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
@@ -74,7 +74,7 @@ entry:
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/pieces.ll b/llvm/test/DebugInfo/COFF/pieces.ll
index 8e62ad0..88c3b7d 100644
--- a/llvm/test/DebugInfo/COFF/pieces.ll
+++ b/llvm/test/DebugInfo/COFF/pieces.ll
@@ -336,11 +336,11 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { nounwind readonly uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #5 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll b/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll
index 8118e0c..8da583b 100644
--- a/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll
+++ b/llvm/test/DebugInfo/COFF/purge-typedef-udts.ll
@@ -60,7 +60,7 @@ entry:
   ret i32 %call, !dbg !48
 }
 
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/register-variables.ll b/llvm/test/DebugInfo/COFF/register-variables.ll
index 84c036c..c0c0cf4 100644
--- a/llvm/test/DebugInfo/COFF/register-variables.ll
+++ b/llvm/test/DebugInfo/COFF/register-variables.ll
@@ -209,8 +209,8 @@ declare void @putint(i32) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "tune-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "tune-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/retained-types.ll b/llvm/test/DebugInfo/COFF/retained-types.ll
index 799ae2b..7c8654f 100644
--- a/llvm/test/DebugInfo/COFF/retained-types.ll
+++ b/llvm/test/DebugInfo/COFF/retained-types.ll
@@ -62,7 +62,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/scopes.ll b/llvm/test/DebugInfo/COFF/scopes.ll
index b33e848..b1e27a4 100644
--- a/llvm/test/DebugInfo/COFF/scopes.ll
+++ b/llvm/test/DebugInfo/COFF/scopes.ll
@@ -102,7 +102,7 @@ entry:
   ret void, !dbg !34
 }
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!13}
diff --git a/llvm/test/DebugInfo/COFF/simple.ll b/llvm/test/DebugInfo/COFF/simple.ll
index f81de98..d450bd0 100644
--- a/llvm/test/DebugInfo/COFF/simple.ll
+++ b/llvm/test/DebugInfo/COFF/simple.ll
@@ -284,8 +284,8 @@ entry:
 
 declare void @g() #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/COFF/static-methods.ll b/llvm/test/DebugInfo/COFF/static-methods.ll
index 4ec6aa6..f0c7e89 100644
--- a/llvm/test/DebugInfo/COFF/static-methods.ll
+++ b/llvm/test/DebugInfo/COFF/static-methods.ll
@@ -98,8 +98,8 @@ entry:
   ret void
 }
 
-attributes #0 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { builtin }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index 896e760..a298d0b 100644
--- a/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/llvm/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -50,8 +50,8 @@ entry:
 
 declare void @"\01?foo@@YAXXZ"() #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/thunk.ll b/llvm/test/DebugInfo/COFF/thunk.ll
index 18fbeac..7ab96c8 100644
--- a/llvm/test/DebugInfo/COFF/thunk.ll
+++ b/llvm/test/DebugInfo/COFF/thunk.ll
@@ -469,13 +469,13 @@ entry:
   ret i1 true, !dbg !102
 }
 
-attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #3 = { argmemonly nounwind }
-attributes #4 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #5 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "thunk" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #6 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #5 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "thunk" "use-soft-float"="false" }
+attributes #6 = { noinline optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #7 = { builtin }
 attributes #8 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/type-quals.ll b/llvm/test/DebugInfo/COFF/type-quals.ll
index de36545..bca2590 100644
--- a/llvm/test/DebugInfo/COFF/type-quals.ll
+++ b/llvm/test/DebugInfo/COFF/type-quals.ll
@@ -477,7 +477,7 @@ entry:
   ret i32 1, !dbg !86
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/types-array.ll b/llvm/test/DebugInfo/COFF/types-array.ll
index 5dec93d..a197aeb 100644
--- a/llvm/test/DebugInfo/COFF/types-array.ll
+++ b/llvm/test/DebugInfo/COFF/types-array.ll
@@ -108,10 +108,10 @@ declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture readonly, i32,
 
 declare void @"\01?usevars@@YAXHZZ"(i32, ...) #3
 
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/COFF/types-basic.ll b/llvm/test/DebugInfo/COFF/types-basic.ll
index 897c632..80be1d6 100644
--- a/llvm/test/DebugInfo/COFF/types-basic.ll
+++ b/llvm/test/DebugInfo/COFF/types-basic.ll
@@ -626,10 +626,10 @@ entry:
   ret void, !dbg !96
 }
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/COFF/types-calling-conv.ll b/llvm/test/DebugInfo/COFF/types-calling-conv.ll
index 7e51890..1cbebd3 100644
--- a/llvm/test/DebugInfo/COFF/types-calling-conv.ll
+++ b/llvm/test/DebugInfo/COFF/types-calling-conv.ll
@@ -208,8 +208,8 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-cvarargs.ll b/llvm/test/DebugInfo/COFF/types-cvarargs.ll
index 95a747f..def9d01 100644
--- a/llvm/test/DebugInfo/COFF/types-cvarargs.ll
+++ b/llvm/test/DebugInfo/COFF/types-cvarargs.ll
@@ -73,9 +73,9 @@ entry:
   ret i32 1, !dbg !35
 }
 
-attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
-attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!12, !13, !14, !15}
diff --git a/llvm/test/DebugInfo/COFF/types-data-members.ll b/llvm/test/DebugInfo/COFF/types-data-members.ll
index af0af47..2c980f5 100644
--- a/llvm/test/DebugInfo/COFF/types-data-members.ll
+++ b/llvm/test/DebugInfo/COFF/types-data-members.ll
@@ -816,9 +816,9 @@ ctor.skip_vbases:                                 ; preds = %ctor.init_vbases, %
   ret ptr %5, !dbg !62
 }
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll b/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll
index f931b46..0ba2dfe 100644
--- a/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll
+++ b/llvm/test/DebugInfo/COFF/types-method-ref-qualifiers.ll
@@ -41,7 +41,7 @@ entry:
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-recursive-struct.ll b/llvm/test/DebugInfo/COFF/types-recursive-struct.ll
index bd4faf0..8f5e0ba 100644
--- a/llvm/test/DebugInfo/COFF/types-recursive-struct.ll
+++ b/llvm/test/DebugInfo/COFF/types-recursive-struct.ll
@@ -157,7 +157,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll b/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll
index 3627253..98ade40 100644
--- a/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll
+++ b/llvm/test/DebugInfo/COFF/types-recursive-unnamed.ll
@@ -43,7 +43,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/udts.ll b/llvm/test/DebugInfo/COFF/udts.ll
index 86d2c7a..7a3c351 100644
--- a/llvm/test/DebugInfo/COFF/udts.ll
+++ b/llvm/test/DebugInfo/COFF/udts.ll
@@ -124,7 +124,7 @@ define float @"\01?g@@YAMPEAUS@@@Z"(ptr) #0 !dbg !38 {
   ret float %6, !dbg !56
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/COFF/unnamed.ll b/llvm/test/DebugInfo/COFF/unnamed.ll
index 06ef86a..396de7a 100644
--- a/llvm/test/DebugInfo/COFF/unnamed.ll
+++ b/llvm/test/DebugInfo/COFF/unnamed.ll
@@ -153,7 +153,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/vframe-csr.ll b/llvm/test/DebugInfo/COFF/vframe-csr.ll
index 866b5e6..f46965a 100644
--- a/llvm/test/DebugInfo/COFF/vframe-csr.ll
+++ b/llvm/test/DebugInfo/COFF/vframe-csr.ll
@@ -136,10 +136,10 @@ declare dso_local void @usecsrs(i32, i32) local_unnamed_addr #3
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #2
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { argmemonly nounwind }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/vframe-fpo.ll b/llvm/test/DebugInfo/COFF/vframe-fpo.ll
index e8ad8bd..146056f 100644
--- a/llvm/test/DebugInfo/COFF/vframe-fpo.ll
+++ b/llvm/test/DebugInfo/COFF/vframe-fpo.ll
@@ -224,10 +224,10 @@ declare dso_local void @"?g@@YAXAAH00@Z"(ptr dereferenceable(4), ptr dereference
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1
 
-attributes #0 = { norecurse optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
 attributes #2 = { nounwind readnone speculatable }
-attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #4 = { nounwind }
 attributes #5 = { optsize }
 
diff --git a/llvm/test/DebugInfo/COFF/vftables.ll b/llvm/test/DebugInfo/COFF/vftables.ll
index 1f8db17..b93fefa 100644
--- a/llvm/test/DebugInfo/COFF/vftables.ll
+++ b/llvm/test/DebugInfo/COFF/vftables.ll
@@ -414,11 +414,11 @@ entry:
 
 declare void @"\01?f@D@@UEAAXXZ"(ptr) unnamed_addr #3
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { inlinehint nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #5 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll b/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll
index 5180bae..daa4b9c 100644
--- a/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll
+++ b/llvm/test/DebugInfo/COFF/virtual-method-kinds.ll
@@ -217,11 +217,11 @@ entry:
   ret void
 }
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nobuiltin "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { nobuiltin "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #3 = { nounwind readnone }
-attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #5 = { builtin }
 attributes #6 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/COFF/virtual-methods.ll b/llvm/test/DebugInfo/COFF/virtual-methods.ll
index 8f06912..930f96d 100644
--- a/llvm/test/DebugInfo/COFF/virtual-methods.ll
+++ b/llvm/test/DebugInfo/COFF/virtual-methods.ll
@@ -328,11 +328,11 @@ entry:
   ret i32 %2, !dbg !102
 }
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { inlinehint nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #4 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #5 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll b/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll
index eb92d6c..718033f 100644
--- a/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll
+++ b/llvm/test/DebugInfo/COFF/vtable-optzn-array.ll
@@ -89,8 +89,8 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 attributes #3 = { nounwind readnone }
 
diff --git a/llvm/test/DebugInfo/Generic/PR20038.ll b/llvm/test/DebugInfo/Generic/PR20038.ll
index 024a6ab..d4c3f85 100644
--- a/llvm/test/DebugInfo/Generic/PR20038.ll
+++ b/llvm/test/DebugInfo/Generic/PR20038.ll
@@ -109,8 +109,8 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/block-asan.ll b/llvm/test/DebugInfo/Generic/block-asan.ll
index db49289..58b20e2 100644
--- a/llvm/test/DebugInfo/Generic/block-asan.ll
+++ b/llvm/test/DebugInfo/Generic/block-asan.ll
@@ -47,9 +47,9 @@ declare void @bar(i32) #2
 
 declare void @_Block_object_dispose(ptr, i32)
 
-attributes #0 = { nounwind ssp uwtable sanitize_address "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable sanitize_address "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/constant-pointers.ll b/llvm/test/DebugInfo/Generic/constant-pointers.ll
index 1e69109..772b29b 100644
--- a/llvm/test/DebugInfo/Generic/constant-pointers.ll
+++ b/llvm/test/DebugInfo/Generic/constant-pointers.ll
@@ -22,7 +22,7 @@ entry:
   ret void, !dbg !18
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15, !16}
diff --git a/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll b/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll
index 91b481c..3922d46 100644
--- a/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll
+++ b/llvm/test/DebugInfo/Generic/cross-cu-inlining.ll
@@ -101,8 +101,8 @@ declare void @llvm.lifetime.start(i64, ptr nocapture) #3
 ; Function Attrs: nounwind
 declare void @llvm.lifetime.end(i64, ptr nocapture) #3
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll b/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll
index 7030157..023c34b 100644
--- a/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll
+++ b/llvm/test/DebugInfo/Generic/cross-cu-linkonce.ll
@@ -41,7 +41,7 @@ define linkonce_odr i32 @_Z4funci(i32 %i) #0 !dbg !19 {
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!9, !13}
diff --git a/llvm/test/DebugInfo/Generic/cu-range-hole.ll b/llvm/test/DebugInfo/Generic/cu-range-hole.ll
index 4a6a753..397cfcc 100644
--- a/llvm/test/DebugInfo/Generic/cu-range-hole.ll
+++ b/llvm/test/DebugInfo/Generic/cu-range-hole.ll
@@ -47,7 +47,7 @@ entry:
   ret i32 %add, !dbg !16
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.ident = !{!0, !0}
diff --git a/llvm/test/DebugInfo/Generic/cu-ranges.ll b/llvm/test/DebugInfo/Generic/cu-ranges.ll
index b962bce..dbb50d4 100644
--- a/llvm/test/DebugInfo/Generic/cu-ranges.ll
+++ b/llvm/test/DebugInfo/Generic/cu-ranges.ll
@@ -41,7 +41,7 @@ entry:
   ret i32 %add, !dbg !18
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/dead-argument-order.ll b/llvm/test/DebugInfo/Generic/dead-argument-order.ll
index f6cd809..b4b4cfd 100644
--- a/llvm/test/DebugInfo/Generic/dead-argument-order.ll
+++ b/llvm/test/DebugInfo/Generic/dead-argument-order.ll
@@ -48,7 +48,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll b/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll
index f433838..13bbdc4 100644
--- a/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll
+++ b/llvm/test/DebugInfo/Generic/debug-info-always-inline.ll
@@ -104,10 +104,10 @@ entry:
 
 declare void @_Z3barv() #3
 
-attributes #0 = { alwaysinline nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { alwaysinline nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind sspstrong "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !12}
diff --git a/llvm/test/DebugInfo/Generic/def-line.ll b/llvm/test/DebugInfo/Generic/def-line.ll
index 73d8c08..bec3a88 100644
--- a/llvm/test/DebugInfo/Generic/def-line.ll
+++ b/llvm/test/DebugInfo/Generic/def-line.ll
@@ -59,8 +59,8 @@ entry:
   ret void, !dbg !22
 }
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!16, !17}
diff --git a/llvm/test/DebugInfo/Generic/directives-only.ll b/llvm/test/DebugInfo/Generic/directives-only.ll
index 4754df7..f3c3555 100644
--- a/llvm/test/DebugInfo/Generic/directives-only.ll
+++ b/llvm/test/DebugInfo/Generic/directives-only.ll
@@ -34,8 +34,8 @@ entry:
 
 declare void @f1(...) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/discriminator.ll b/llvm/test/DebugInfo/Generic/discriminator.ll
index e51ce91..4b0993c 100644
--- a/llvm/test/DebugInfo/Generic/discriminator.ll
+++ b/llvm/test/DebugInfo/Generic/discriminator.ll
@@ -24,8 +24,8 @@ define void @_Z3foov() #0 !dbg !4 {
 
 declare void @_Z3xyzv() #1
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/enum-types.ll b/llvm/test/DebugInfo/Generic/enum-types.ll
index 8af9b21..6926c78 100644
--- a/llvm/test/DebugInfo/Generic/enum-types.ll
+++ b/llvm/test/DebugInfo/Generic/enum-types.ll
@@ -40,7 +40,7 @@ entry:
   ret void, !dbg !27
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0, !12}
diff --git a/llvm/test/DebugInfo/Generic/enum.ll b/llvm/test/DebugInfo/Generic/enum.ll
index 63665e3..22656d3 100644
--- a/llvm/test/DebugInfo/Generic/enum.ll
+++ b/llvm/test/DebugInfo/Generic/enum.ll
@@ -47,7 +47,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!8}
diff --git a/llvm/test/DebugInfo/Generic/extended-loc-directive.ll b/llvm/test/DebugInfo/Generic/extended-loc-directive.ll
index 901830d..dbd98a5 100644
--- a/llvm/test/DebugInfo/Generic/extended-loc-directive.ll
+++ b/llvm/test/DebugInfo/Generic/extended-loc-directive.ll
@@ -46,8 +46,8 @@ entry:
 
 declare void @f1(...) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/global-sra-array.ll b/llvm/test/DebugInfo/Generic/global-sra-array.ll
index 8076efd..d2e363c 100644
--- a/llvm/test/DebugInfo/Generic/global-sra-array.ll
+++ b/llvm/test/DebugInfo/Generic/global-sra-array.ll
@@ -70,7 +70,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { optsize }
 
diff --git a/llvm/test/DebugInfo/Generic/global.ll b/llvm/test/DebugInfo/Generic/global.ll
index ddb3f8b..152f3de 100644
--- a/llvm/test/DebugInfo/Generic/global.ll
+++ b/llvm/test/DebugInfo/Generic/global.ll
@@ -21,7 +21,7 @@ entry:
   ret i32 0, !dbg !12
 }
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
index 253b7c3..c91ec40 100644
--- a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
+++ b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
@@ -325,9 +325,9 @@ declare void @__asan_unregister_globals(i64, i64)
 
 declare void @__sanitizer_cov_module_init(i64)
 
-attributes #0 = { noreturn sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { sanitize_address "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #3 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
index 3475533..9399eb7 100644
--- a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
+++ b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
@@ -47,8 +47,8 @@ declare i32 @_Z4funcv() #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll b/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll
index 88d8794..f1db6c0 100644
--- a/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll
+++ b/llvm/test/DebugInfo/Generic/inline-no-debug-info.ll
@@ -49,7 +49,7 @@ entry:
   ret void, !dbg !12
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/inline-scopes.ll b/llvm/test/DebugInfo/Generic/inline-scopes.ll
index 45ecdd0..4058487 100644
--- a/llvm/test/DebugInfo/Generic/inline-scopes.ll
+++ b/llvm/test/DebugInfo/Generic/inline-scopes.ll
@@ -98,9 +98,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare zeroext i1 @_Z1fv() #2
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13, !14}
diff --git a/llvm/test/DebugInfo/Generic/inlined-arguments.ll b/llvm/test/DebugInfo/Generic/inlined-arguments.ll
index a802384..3b0ee2a 100644
--- a/llvm/test/DebugInfo/Generic/inlined-arguments.ll
+++ b/llvm/test/DebugInfo/Generic/inlined-arguments.ll
@@ -41,8 +41,8 @@ declare void @_Z2f3i(i32) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/inlined-strings.ll b/llvm/test/DebugInfo/Generic/inlined-strings.ll
index ea68c9f..64484a3 100644
--- a/llvm/test/DebugInfo/Generic/inlined-strings.ll
+++ b/llvm/test/DebugInfo/Generic/inlined-strings.ll
@@ -22,7 +22,7 @@ entry:
   ret i32 0, !dbg !12
 }
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/Generic/lto-comp-dir.ll b/llvm/test/DebugInfo/Generic/lto-comp-dir.ll
index 873a18f..05dbb24 100644
--- a/llvm/test/DebugInfo/Generic/lto-comp-dir.ll
+++ b/llvm/test/DebugInfo/Generic/lto-comp-dir.ll
@@ -57,8 +57,8 @@ entry:
   ret i32 0, !dbg !21
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0, !8}
 !llvm.module.flags = !{!16, !17}
diff --git a/llvm/test/DebugInfo/Generic/mainsubprogram.ll b/llvm/test/DebugInfo/Generic/mainsubprogram.ll
index 24a3a7a..30b8c1f 100644
--- a/llvm/test/DebugInfo/Generic/mainsubprogram.ll
+++ b/llvm/test/DebugInfo/Generic/mainsubprogram.ll
@@ -15,7 +15,7 @@ entry:
   ret i32 0, !dbg !10
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/Generic/member-order.ll b/llvm/test/DebugInfo/Generic/member-order.ll
index a2965cc..805e1e8 100644
--- a/llvm/test/DebugInfo/Generic/member-order.ll
+++ b/llvm/test/DebugInfo/Generic/member-order.ll
@@ -35,7 +35,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/multiline.ll b/llvm/test/DebugInfo/Generic/multiline.ll
index 12476db..be3f10b 100644
--- a/llvm/test/DebugInfo/Generic/multiline.ll
+++ b/llvm/test/DebugInfo/Generic/multiline.ll
@@ -52,8 +52,8 @@ entry:
 
 declare void @f1(...) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/namespace.ll b/llvm/test/DebugInfo/Generic/namespace.ll
index 2db69f1..48b4093 100644
--- a/llvm/test/DebugInfo/Generic/namespace.ll
+++ b/llvm/test/DebugInfo/Generic/namespace.ll
@@ -266,7 +266,7 @@ entry:
   ret void, !dbg !74
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/namespace_function_definition.ll b/llvm/test/DebugInfo/Generic/namespace_function_definition.ll
index 5c5543f..f69bc07 100644
--- a/llvm/test/DebugInfo/Generic/namespace_function_definition.ll
+++ b/llvm/test/DebugInfo/Generic/namespace_function_definition.ll
@@ -22,7 +22,7 @@ entry:
   ret void, !dbg !11
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll b/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll
index d9ad39b..3960aca 100644
--- a/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll
+++ b/llvm/test/DebugInfo/Generic/namespace_inline_function_definition.ll
@@ -60,8 +60,8 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/recursive_inlining.ll b/llvm/test/DebugInfo/Generic/recursive_inlining.ll
index 908e408..3174fc3 100644
--- a/llvm/test/DebugInfo/Generic/recursive_inlining.ll
+++ b/llvm/test/DebugInfo/Generic/recursive_inlining.ll
@@ -186,8 +186,8 @@ declare void @_Z3fn2iiii(i32, i32, i32, i32) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/Generic/restrict.ll b/llvm/test/DebugInfo/Generic/restrict.ll
index 0131e53..f508ec6 100644
--- a/llvm/test/DebugInfo/Generic/restrict.ll
+++ b/llvm/test/DebugInfo/Generic/restrict.ll
@@ -26,7 +26,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/tu-composite.ll b/llvm/test/DebugInfo/Generic/tu-composite.ll
index bcfe049..a880ac5f 100644
--- a/llvm/test/DebugInfo/Generic/tu-composite.ll
+++ b/llvm/test/DebugInfo/Generic/tu-composite.ll
@@ -115,7 +115,7 @@ entry:
   ret void, !dbg !58
 }
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/unconditional-branch.ll b/llvm/test/DebugInfo/Generic/unconditional-branch.ll
index 9f8f975..287f08f 100644
--- a/llvm/test/DebugInfo/Generic/unconditional-branch.ll
+++ b/llvm/test/DebugInfo/Generic/unconditional-branch.ll
@@ -37,7 +37,7 @@ sw.default:                                       ; preds = %entry
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Generic/version.ll b/llvm/test/DebugInfo/Generic/version.ll
index b68528d..08a4f1a 100644
--- a/llvm/test/DebugInfo/Generic/version.ll
+++ b/llvm/test/DebugInfo/Generic/version.ll
@@ -11,7 +11,7 @@ entry:
   ret i32 0, !dbg !10
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/Inputs/gmlt.ll b/llvm/test/DebugInfo/Inputs/gmlt.ll
index 6f112fc..c9db2d2 100644
--- a/llvm/test/DebugInfo/Inputs/gmlt.ll
+++ b/llvm/test/DebugInfo/Inputs/gmlt.ll
@@ -110,8 +110,8 @@ entry:
   ret void, !dbg !19
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Inputs/line.ll b/llvm/test/DebugInfo/Inputs/line.ll
index f4d6db9..24628cf 100644
--- a/llvm/test/DebugInfo/Inputs/line.ll
+++ b/llvm/test/DebugInfo/Inputs/line.ll
@@ -33,7 +33,7 @@ land.end:                                         ; preds = %land.rhs, %entry
   ret i32 %conv, !dbg !13
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/MSP430/cu-ranges.ll b/llvm/test/DebugInfo/MSP430/cu-ranges.ll
index 5c15c31..8f7cf407 100644
--- a/llvm/test/DebugInfo/MSP430/cu-ranges.ll
+++ b/llvm/test/DebugInfo/MSP430/cu-ranges.ll
@@ -46,7 +46,7 @@ entry:
   ret i32 %add, !dbg !18
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/Mips/fn-call-line.ll b/llvm/test/DebugInfo/Mips/fn-call-line.ll
index c12a47b..2f02040f 100644
--- a/llvm/test/DebugInfo/Mips/fn-call-line.ll
+++ b/llvm/test/DebugInfo/Mips/fn-call-line.ll
@@ -61,8 +61,8 @@ entry:
 
 declare void @f1(...) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
index 1f4c44e..c1288e8 100644
--- a/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
+++ b/llvm/test/DebugInfo/NVPTX/cu-range-hole.ll
@@ -285,7 +285,7 @@ entry:
 ; CHECK-NEXT: 	.section	.debug_macinfo	{	}
 ; CHECK-NOT: debug_
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.ident = !{!0, !0}
diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll
index 4624dce..9ce0b73 100644
--- a/llvm/test/DebugInfo/NVPTX/debug-info.ll
+++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll
@@ -2675,7 +2675,7 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_20" "target-features"="+ptx42" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_20" "target-features"="+ptx42" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readnone speculatable }
 attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll
index 7a58cae..e7e09a5 100644
--- a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll
+++ b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll
@@ -88,9 +88,9 @@ if.end4:                                          ; preds = %if.then2, %if.end
 
 declare signext i8 @_ZN1A3fooEv(ptr) #2
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0, !9}
 !llvm.module.flags = !{!18, !19}
diff --git a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
index 65542e8..6aaedf4 100644
--- a/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
+++ b/llvm/test/DebugInfo/RISCV/dwarf-riscv-relocs.ll
@@ -118,7 +118,7 @@ entry:
 
 declare void @ext()
 
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+relax" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+relax" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/Sparc/gnu-window-save.ll b/llvm/test/DebugInfo/Sparc/gnu-window-save.ll
index cc2e764..3fab8dd 100644
--- a/llvm/test/DebugInfo/Sparc/gnu-window-save.ll
+++ b/llvm/test/DebugInfo/Sparc/gnu-window-save.ll
@@ -48,8 +48,8 @@ entry:
 
 declare signext i32 @printf(ptr, ...) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll b/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll
index 7178ef3..1fc1c84 100644
--- a/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll
+++ b/llvm/test/DebugInfo/WebAssembly/dbg-loop-loc.ll
@@ -79,7 +79,7 @@ for.end:                                          ; preds = %for.cond
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll b/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll
index 1b552b5..603f28a 100644
--- a/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll
+++ b/llvm/test/DebugInfo/WebAssembly/debugtest-opt.ll
@@ -33,7 +33,7 @@ entry:
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind willreturn }
 attributes #2 = { nounwind readnone speculatable willreturn }
 attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll
index 52b3688..ebbe357 100644
--- a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll
+++ b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll
@@ -56,7 +56,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll b/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
index 81e2e33..dda757ab 100644
--- a/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
+++ b/llvm/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
@@ -25,7 +25,7 @@ entry:
   ret i32 0, !dbg !10
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/addr_comments.ll b/llvm/test/DebugInfo/X86/addr_comments.ll
index 58050d2..c451606 100644
--- a/llvm/test/DebugInfo/X86/addr_comments.ll
+++ b/llvm/test/DebugInfo/X86/addr_comments.ll
@@ -16,7 +16,7 @@ entry:
   ret void, !dbg !10
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/arguments.ll b/llvm/test/DebugInfo/X86/arguments.ll
index 767c138..69ef9a6 100644
--- a/llvm/test/DebugInfo/X86/arguments.ll
+++ b/llvm/test/DebugInfo/X86/arguments.ll
@@ -41,7 +41,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/coff_debug_info_type.ll b/llvm/test/DebugInfo/X86/coff_debug_info_type.ll
index a40b511..f70973d 100644
--- a/llvm/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/llvm/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -26,7 +26,7 @@ entry:
   ret i32 0, !dbg !10
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/coff_relative_names.ll b/llvm/test/DebugInfo/X86/coff_relative_names.ll
index ea6d856..81e6dd58 100644
--- a/llvm/test/DebugInfo/X86/coff_relative_names.ll
+++ b/llvm/test/DebugInfo/X86/coff_relative_names.ll
@@ -18,7 +18,7 @@ entry:
   ret i32 0, !dbg !10
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/convert-loclist.ll b/llvm/test/DebugInfo/X86/convert-loclist.ll
index 0fb15d5..f1aa5dc 100644
--- a/llvm/test/DebugInfo/X86/convert-loclist.ll
+++ b/llvm/test/DebugInfo/X86/convert-loclist.ll
@@ -60,8 +60,8 @@ declare !dbg !4 dso_local void @_Z2f1v() local_unnamed_addr #1
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/cu-ranges-odr.ll b/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
index ac58e2f..d1690d3 100644
--- a/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
+++ b/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
@@ -55,7 +55,7 @@ entry:
   ret void, !dbg !31
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!11}
diff --git a/llvm/test/DebugInfo/X86/cu-ranges.ll b/llvm/test/DebugInfo/X86/cu-ranges.ll
index 1afbdc5..b1b43ae 100644
--- a/llvm/test/DebugInfo/X86/cu-ranges.ll
+++ b/llvm/test/DebugInfo/X86/cu-ranges.ll
@@ -60,7 +60,7 @@ entry:
   ret i32 %add, !dbg !16
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/dbg_value_direct.ll b/llvm/test/DebugInfo/X86/dbg_value_direct.ll
index 4ac2541..afd60e2 100644
--- a/llvm/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/llvm/test/DebugInfo/X86/dbg_value_direct.ll
@@ -140,9 +140,9 @@ declare void @__asan_register_globals(i64, i64)
 
 declare void @__asan_unregister_globals(i64, i64)
 
-attributes #0 = { sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22, !27}
diff --git a/llvm/test/DebugInfo/X86/debug-dead-local-var.ll b/llvm/test/DebugInfo/X86/debug-dead-local-var.ll
index 6e87ece..6aecbfa 100644
--- a/llvm/test/DebugInfo/X86/debug-dead-local-var.ll
+++ b/llvm/test/DebugInfo/X86/debug-dead-local-var.ll
@@ -24,7 +24,7 @@ entry:
   ret i32 1, !dbg !21
 }
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !19}
diff --git a/llvm/test/DebugInfo/X86/debug-info-blocks.ll b/llvm/test/DebugInfo/X86/debug-info-blocks.ll
index 759e703..8924b30 100644
--- a/llvm/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/llvm/test/DebugInfo/X86/debug-info-blocks.ll
@@ -259,7 +259,7 @@ define i32 @main() #0 !dbg !36 {
   ret i32 0, !dbg !109
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nonlazybind }
 attributes #3 = { nounwind }
diff --git a/llvm/test/DebugInfo/X86/debug-loc-asan.mir b/llvm/test/DebugInfo/X86/debug-loc-asan.mir
index e50bd60..e1d9651 100644
--- a/llvm/test/DebugInfo/X86/debug-loc-asan.mir
+++ b/llvm/test/DebugInfo/X86/debug-loc-asan.mir
@@ -160,7 +160,7 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #2
   
-  attributes #0 = { nounwind sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind sanitize_address uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
   attributes #1 = { nounwind readnone speculatable }
   attributes #2 = { nounwind }
   
diff --git a/llvm/test/DebugInfo/X86/debug-loc-offset.mir b/llvm/test/DebugInfo/X86/debug-loc-offset.mir
index 5c35626..c07ea68 100644
--- a/llvm/test/DebugInfo/X86/debug-loc-offset.mir
+++ b/llvm/test/DebugInfo/X86/debug-loc-offset.mir
@@ -133,9 +133,9 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #3
   
-  attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
   attributes #1 = { nounwind readnone speculatable }
-  attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
   attributes #3 = { nounwind }
   
   !llvm.dbg.cu = !{!0, !3}
diff --git a/llvm/test/DebugInfo/X86/debug-ranges-offset.ll b/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
index b071225..24cc73c 100644
--- a/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
+++ b/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -189,8 +189,8 @@ declare ptr @__msan_memset(ptr, i32, i64)
 ; Function Attrs: nounwind
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) #3
 
-attributes #0 = { sanitize_memory uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nobuiltin "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { sanitize_memory uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nobuiltin "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 attributes #4 = { builtin }
diff --git a/llvm/test/DebugInfo/X86/decl-derived-member.ll b/llvm/test/DebugInfo/X86/decl-derived-member.ll
index 8d32d3c..822bb8e 100644
--- a/llvm/test/DebugInfo/X86/decl-derived-member.ll
+++ b/llvm/test/DebugInfo/X86/decl-derived-member.ll
@@ -88,11 +88,11 @@ entry:
   ret void
 }
 
-attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { inlinehint uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { inlinehint uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 attributes #3 = { nounwind readnone }
-attributes #4 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!8}
 !llvm.module.flags = !{!12, !13}
diff --git a/llvm/test/DebugInfo/X86/discriminator.ll b/llvm/test/DebugInfo/X86/discriminator.ll
index ef89838..0872ea8 100644
--- a/llvm/test/DebugInfo/X86/discriminator.ll
+++ b/llvm/test/DebugInfo/X86/discriminator.ll
@@ -35,7 +35,7 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %2, !dbg !13
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
diff --git a/llvm/test/DebugInfo/X86/discriminator2.ll b/llvm/test/DebugInfo/X86/discriminator2.ll
index f966372..07cda30 100644
--- a/llvm/test/DebugInfo/X86/discriminator2.ll
+++ b/llvm/test/DebugInfo/X86/discriminator2.ll
@@ -32,8 +32,8 @@ declare void @_Z3fooii(i32, i32) #1
 
 declare i32 @_Z3barv() #1
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/X86/discriminator3.ll b/llvm/test/DebugInfo/X86/discriminator3.ll
index 19a7764..1898599 100644
--- a/llvm/test/DebugInfo/X86/discriminator3.ll
+++ b/llvm/test/DebugInfo/X86/discriminator3.ll
@@ -35,9 +35,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @_Z3fooi(i32) #2
 
-attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
index da0d8139..08f071d 100644
--- a/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -53,8 +53,8 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readonly uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { nounwind readonly uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!4}
diff --git a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll
index b06179d..857c1da 100644
--- a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll
@@ -49,7 +49,7 @@ entry:
   ret i32 %0, !dbg !15
 }
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll b/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll
index d1f1ae3..bbfe2b1 100644
--- a/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-pubnames-split.ll
@@ -19,7 +19,7 @@ entry:
   ret i32 0, !dbg !10
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
diff --git a/llvm/test/DebugInfo/X86/fission-inline.ll b/llvm/test/DebugInfo/X86/fission-inline.ll
index d038b07..13ab43f 100644
--- a/llvm/test/DebugInfo/X86/fission-inline.ll
+++ b/llvm/test/DebugInfo/X86/fission-inline.ll
@@ -90,8 +90,8 @@ entry:
 
 declare void @_Z2f1v() #1
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22, !23}
diff --git a/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll b/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll
index 6c9885f..447601c 100644
--- a/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll
+++ b/llvm/test/DebugInfo/X86/fission-no-inline-gsym.ll
@@ -46,8 +46,8 @@ entry:
 
 declare void @_Z2f1v() #1
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22, !23}
diff --git a/llvm/test/DebugInfo/X86/fission-no-inlining.ll b/llvm/test/DebugInfo/X86/fission-no-inlining.ll
index 7ae5c2b..a4a3ac6 100644
--- a/llvm/test/DebugInfo/X86/fission-no-inlining.ll
+++ b/llvm/test/DebugInfo/X86/fission-no-inlining.ll
@@ -22,8 +22,8 @@ entry:
 
 declare void @_Z2f1v() #1
 
-attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3}
diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll
index 10d2150c..be0a375 100644
--- a/llvm/test/DebugInfo/X86/fission-ranges.ll
+++ b/llvm/test/DebugInfo/X86/fission-ranges.ll
@@ -187,7 +187,7 @@ for.end18:                                        ; preds = %for.inc16
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/generate-odr-hash.ll b/llvm/test/DebugInfo/X86/generate-odr-hash.ll
index 5e583a3..ad083ab 100644
--- a/llvm/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/llvm/test/DebugInfo/X86/generate-odr-hash.ll
@@ -215,7 +215,7 @@ entry:
   ret void, !dbg !57
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!34}
diff --git a/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll b/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
index f80b3ac..d33d357 100644
--- a/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
+++ b/llvm/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
@@ -60,7 +60,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll b/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll
index 727a3c3..d01184d 100644
--- a/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll
+++ b/llvm/test/DebugInfo/X86/gmlt-empty-base-address.ll
@@ -16,7 +16,7 @@ define dso_local void @_Z2f2v() #0 !dbg !10 {
   ret void, !dbg !11
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll b/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll
index 3984ad4..adac419 100644
--- a/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll
+++ b/llvm/test/DebugInfo/X86/gnu-public-names-gmlt.ll
@@ -39,8 +39,8 @@ entry:
 
 declare void @_Z2f1v() #1
 
-attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/gnu-public-names.ll b/llvm/test/DebugInfo/X86/gnu-public-names.ll
index 5dd6449..6b1279b 100644
--- a/llvm/test/DebugInfo/X86/gnu-public-names.ll
+++ b/llvm/test/DebugInfo/X86/gnu-public-names.ll
@@ -304,7 +304,7 @@ entry:
   ret i32 %add5, !dbg !90
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!2}
diff --git a/llvm/test/DebugInfo/X86/inline-member-function.ll b/llvm/test/DebugInfo/X86/inline-member-function.ll
index ecb65e9..dee0c82 100644
--- a/llvm/test/DebugInfo/X86/inline-member-function.ll
+++ b/llvm/test/DebugInfo/X86/inline-member-function.ll
@@ -60,7 +60,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!4}
diff --git a/llvm/test/DebugInfo/X86/inline-seldag-test.ll b/llvm/test/DebugInfo/X86/inline-seldag-test.ll
index 6a62d2b..874a01e 100644
--- a/llvm/test/DebugInfo/X86/inline-seldag-test.ll
+++ b/llvm/test/DebugInfo/X86/inline-seldag-test.ll
@@ -41,7 +41,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/lexical_block.ll b/llvm/test/DebugInfo/X86/lexical_block.ll
index 8063427..08df740 100644
--- a/llvm/test/DebugInfo/X86/lexical_block.ll
+++ b/llvm/test/DebugInfo/X86/lexical_block.ll
@@ -40,7 +40,7 @@ if.end:                                           ; preds = %if.then, %entry
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/line-info.ll b/llvm/test/DebugInfo/X86/line-info.ll
index 5884ee1..ac208f3 100644
--- a/llvm/test/DebugInfo/X86/line-info.ll
+++ b/llvm/test/DebugInfo/X86/line-info.ll
@@ -32,7 +32,7 @@ entry:
   ret i32 0, !dbg !17
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/low-pc-cu.ll b/llvm/test/DebugInfo/X86/low-pc-cu.ll
index e56c7b9..4be5014 100644
--- a/llvm/test/DebugInfo/X86/low-pc-cu.ll
+++ b/llvm/test/DebugInfo/X86/low-pc-cu.ll
@@ -26,7 +26,7 @@ entry:
   ret void, !dbg !11
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/X86/mi-print.ll b/llvm/test/DebugInfo/X86/mi-print.ll
index 8067006..b35b0d2 100644
--- a/llvm/test/DebugInfo/X86/mi-print.ll
+++ b/llvm/test/DebugInfo/X86/mi-print.ll
@@ -24,7 +24,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/missing-abstract-variable.ll b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll
index 2136380..c31c442 100644
--- a/llvm/test/DebugInfo/X86/missing-abstract-variable.ll
+++ b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll
@@ -122,8 +122,8 @@ declare void @_Z1fi(i32) #1
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/no_debug_ranges.ll b/llvm/test/DebugInfo/X86/no_debug_ranges.ll
index b0fd815..f961e45 100644
--- a/llvm/test/DebugInfo/X86/no_debug_ranges.ll
+++ b/llvm/test/DebugInfo/X86/no_debug_ranges.ll
@@ -33,7 +33,7 @@ entry:
   ret void, !dbg !12
 }
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5}
diff --git a/llvm/test/DebugInfo/X86/nodebug.ll b/llvm/test/DebugInfo/X86/nodebug.ll
index 5495886f..5dbe65b 100644
--- a/llvm/test/DebugInfo/X86/nodebug.ll
+++ b/llvm/test/DebugInfo/X86/nodebug.ll
@@ -35,7 +35,7 @@ entry:
   ret void
 }
 
-attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
diff --git a/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll b/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
index 6bcc349..e945fcc 100644
--- a/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
+++ b/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
@@ -84,8 +84,8 @@ declare void @llvm.lifetime.start(i64, ptr nocapture) #3
 ; Function Attrs: nounwind
 declare void @llvm.lifetime.end(i64, ptr nocapture) #3
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/X86/objc-property-void.ll b/llvm/test/DebugInfo/X86/objc-property-void.ll
index 4c5c16a..431ea00 100644
--- a/llvm/test/DebugInfo/X86/objc-property-void.ll
+++ b/llvm/test/DebugInfo/X86/objc-property-void.ll
@@ -66,7 +66,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/pieces-4.ll b/llvm/test/DebugInfo/X86/pieces-4.ll
index 7e27f10..46f6201 100644
--- a/llvm/test/DebugInfo/X86/pieces-4.ll
+++ b/llvm/test/DebugInfo/X86/pieces-4.ll
@@ -53,9 +53,9 @@ declare i32 @g() local_unnamed_addr #2
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/pr19307.mir b/llvm/test/DebugInfo/X86/pr19307.mir
index 4ba1ca0..05e0ba5 100644
--- a/llvm/test/DebugInfo/X86/pr19307.mir
+++ b/llvm/test/DebugInfo/X86/pr19307.mir
@@ -77,9 +77,9 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #3
   
-  attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
   attributes #1 = { nounwind readnone speculatable }
-  attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
   attributes #3 = { nounwind }
   
   !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/pr28270.ll b/llvm/test/DebugInfo/X86/pr28270.ll
index e537be5..6e6ffbc 100644
--- a/llvm/test/DebugInfo/X86/pr28270.ll
+++ b/llvm/test/DebugInfo/X86/pr28270.ll
@@ -52,9 +52,9 @@ declare void @_ZN1AC1EPKc(ptr, ptr) unnamed_addr #2
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.value(metadata, metadata, metadata) #3
 
-attributes #0 = { noreturn uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { argmemonly nounwind }
-attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #3 = { nounwind readnone }
 attributes #4 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/X86/pr45181.ll b/llvm/test/DebugInfo/X86/pr45181.ll
index 3a46930..b973f7d 100644
--- a/llvm/test/DebugInfo/X86/pr45181.ll
+++ b/llvm/test/DebugInfo/X86/pr45181.ll
@@ -140,11 +140,11 @@ declare void @_ZN2aa1yIP1jNS_2ac1zI1eEEED1Ev(ptr) unnamed_addr #4
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
-attributes #0 = { optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable willreturn }
 attributes #2 = { argmemonly nounwind willreturn }
-attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
+attributes #4 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "use-soft-float"="false" }
 attributes #5 = { optsize }
 attributes #6 = { nounwind }
 attributes #7 = { nounwind optsize }
diff --git a/llvm/test/DebugInfo/X86/safestack-byval.ll b/llvm/test/DebugInfo/X86/safestack-byval.ll
index acaa803..5336e58 100644
--- a/llvm/test/DebugInfo/X86/safestack-byval.ll
+++ b/llvm/test/DebugInfo/X86/safestack-byval.ll
@@ -48,7 +48,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 ; Function Attrs: argmemonly nounwind
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #2
 
-attributes #0 = { norecurse nounwind readonly safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind readonly safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { argmemonly nounwind }
 
diff --git a/llvm/test/DebugInfo/X86/set.ll b/llvm/test/DebugInfo/X86/set.ll
index 2bfb08f..89e855ca 100644
--- a/llvm/test/DebugInfo/X86/set.ll
+++ b/llvm/test/DebugInfo/X86/set.ll
@@ -67,7 +67,7 @@ if_1:                                             ; preds = %else_1, %second
   ret ptr @M_Main, !dbg !45
 }
 
-attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/spill-nospill.ll b/llvm/test/DebugInfo/X86/spill-nospill.ll
index 94e47ae..5b08a4bd 100644
--- a/llvm/test/DebugInfo/X86/spill-nospill.ll
+++ b/llvm/test/DebugInfo/X86/spill-nospill.ll
@@ -75,8 +75,8 @@ declare i32 @g(i32) local_unnamed_addr #1
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.value(metadata, metadata, metadata) #2
 
-attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
+attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone speculatable }
 attributes #3 = { nounwind }
 
diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll
index 567f500..ff24504 100644
--- a/llvm/test/DebugInfo/X86/sret.ll
+++ b/llvm/test/DebugInfo/X86/sret.ll
@@ -260,11 +260,11 @@ eh.resume:                                        ; preds = %lpad
 ; Function Attrs: nobuiltin nounwind
 declare void @_ZdlPv(ptr) #4
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #3 = { noinline noreturn nounwind }
-attributes #4 = { nobuiltin nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nobuiltin nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 attributes #5 = { noreturn nounwind }
 attributes #6 = { nounwind }
 attributes #7 = { builtin nounwind }
diff --git a/llvm/test/DebugInfo/X86/tls.ll b/llvm/test/DebugInfo/X86/tls.ll
index d9c1744..f6f6b6f 100644
--- a/llvm/test/DebugInfo/X86/tls.ll
+++ b/llvm/test/DebugInfo/X86/tls.ll
@@ -106,7 +106,7 @@ entry:
   ret i32 0, !dbg !18
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!6}
 !llvm.module.flags = !{!9, !10}
diff --git a/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll b/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll
index 02d9b92..0b2bf6b 100644
--- a/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll
+++ b/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll
@@ -42,7 +42,7 @@ entry:
 ; Function Attrs: nounwind readnone speculatable
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/DebugInfo/X86/void-typedef.ll b/llvm/test/DebugInfo/X86/void-typedef.ll
index 80f2467..c5d9c6c 100644
--- a/llvm/test/DebugInfo/X86/void-typedef.ll
+++ b/llvm/test/DebugInfo/X86/void-typedef.ll
@@ -52,7 +52,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; Function Attrs: noreturn nounwind
 declare void @llvm.trap() #2
 
-attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { noreturn nounwind }
 
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 3faea99..be4e0de 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -5,7 +5,8 @@
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX89,GFX9
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX11
 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX12
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250,GFX1250-ASM
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding | FileCheck %s --check-prefixes=GFX8PLUS,GFX12XX,GFX1250,GFX1250-DIS
 
 // RUN: not llvm-mc -triple=amdgcn -mcpu=tahiti %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOSI --implicit-check-not=error:
 // RUN: not llvm-mc -triple=amdgcn -mcpu=bonaire %s -filetype=null 2>&1 | FileCheck %s --check-prefixes=NOGCN,NOSICI,NOCI --implicit-check-not=error:
@@ -197,7 +198,8 @@ v_fract_f64_e32 v[0:1], 1.0
 v_fract_f64_e32 v[0:1], lit(1.0)
 // GFX11: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
 // GFX12: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
-// GFX1250: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_fract_f64_e32 v[0:1], lit64(0x3ff00000) ; encoding: [0xfe,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f,0x00,0x00,0x00,0x00]
 // GFX89: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x64,0x00,0x7e,0x00,0x00,0xf0,0x3f]
 // SICI: v_fract_f64_e32 v[0:1], lit(0x3ff00000) ; encoding: [0xff,0x7c,0x00,0x7e,0x00,0x00,0xf0,0x3f]
 
@@ -229,15 +231,15 @@ v_cos_f16_e32 v5.l, lit(1.0)
 // NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
-v_tanh_bf16 v5, 1.0
-// GFX1250: v_tanh_bf16_e32 v5, 1.0                 ; encoding: [0xf2,0x94,0x0a,0x7e]
+v_tanh_bf16 v5.l, 1.0
+// GFX1250: v_tanh_bf16_e32 v5.l, 1.0               ; encoding: [0xf2,0x94,0x0a,0x7e]
 // NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
-v_tanh_bf16 v5, lit(1.0)
-// GFX1250: v_tanh_bf16_e32 v5, lit(0x3f80)         ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00]
+v_tanh_bf16 v5.l, lit(1.0)
+// GFX1250: v_tanh_bf16_e32 v5.l, lit(0x3f80)       ; encoding: [0xff,0x94,0x0a,0x7e,0x80,0x3f,0x00,0x00]
 // NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
@@ -291,11 +293,12 @@ v_cvt_pk_fp8_f16 v1.l, 1.0
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cvt_pk_fp8_f16 v1.l, lit(1.0)
-// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00)      ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
-// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_cvt_pk_fp8_f16 v1.l, lit(0x3c00)      ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// GFX1250-DIS: v_cvt_pk_fp8_f16 v1.l, 0x3c00           ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x00,0x3c,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // fp literal, expected int operand
@@ -430,10 +433,11 @@ v_and_b32_e32 v0, 2.3509886e-70, v1
 
 v_not_b16 v5.l, 1.0
 // GFX11: v_not_b16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xd2,0x0a,0x7e]
-// GFX1250: v_not_b16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xd2,0x0a,0x7e]
-// NOGFX12: :[[@LINE-3]]:1: error: operands are not valid for this GPU or mode
-// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_not_b16_e32 v5.l, 1.0                 ; encoding: [0xf2,0xd2,0x0a,0x7e]
+// GFX1250-DIS: v_not_b16_e32 v5.l, 0x3c00              ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x3c,0x00,0x00]
+// NOGFX12: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
+// NOGFX89: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 
 v_not_b16 v5.l, lit(1.0)
 // GFX11: v_not_b16_e32 v5.l, lit(0x3f800000)     ; encoding: [0xff,0xd2,0x0a,0x7e,0x00,0x00,0x80,0x3f]
@@ -501,7 +505,8 @@ v_fract_f64_e32 v[0:1], 1
 v_fract_f64_e32 v[0:1], lit(1)
 // GFX11: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
 // GFX12: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
-// GFX1250: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_fract_f64_e32 v[0:1], lit64(0x1)      ; encoding: [0xfe,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 // GFX89: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x64,0x00,0x7e,0x01,0x00,0x00,0x00]
 // SICI: v_fract_f64_e32 v[0:1], lit(0x1)        ; encoding: [0xff,0x7c,0x00,0x7e,0x01,0x00,0x00,0x00]
 
@@ -678,15 +683,15 @@ v_cos_f16_e32 v5.l, lit(1)
 // NOGFX89: :[[@LINE-4]]:1: error: operands are not valid for this GPU or mode
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
-v_tanh_bf16 v5, 1
-// GFX1250: v_tanh_bf16_e32 v5, 1                   ; encoding: [0x81,0x94,0x0a,0x7e]
+v_tanh_bf16 v5.l, 1
+// GFX1250: v_tanh_bf16_e32 v5.l, 1                 ; encoding: [0x81,0x94,0x0a,0x7e]
 // NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
-v_tanh_bf16 v5, lit(1)
-// GFX1250: v_tanh_bf16_e32 v5, lit(0x1)            ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00]
+v_tanh_bf16 v5.l, lit(1)
+// GFX1250: v_tanh_bf16_e32 v5.l, lit(0x1)          ; encoding: [0xff,0x94,0x0a,0x7e,0x01,0x00,0x00,0x00]
 // NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 // NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
 // NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
@@ -740,11 +745,12 @@ v_cvt_pk_fp8_f16 v1.l, 1
 // NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
 
 v_cvt_pk_fp8_f16 v1.l, lit(1)
-// GFX1250: v_cvt_pk_fp8_f16 v1.l, lit(0x1)         ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
-// NOGFX11: :[[@LINE-2]]:1: error: instruction not supported on this GPU
-// NOGFX12: :[[@LINE-3]]:1: error: instruction not supported on this GPU
-// NOGFX89: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOSICI: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// GFX1250-ASM: v_cvt_pk_fp8_f16 v1.l, lit(0x1)         ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// GFX1250-DIS: v_cvt_pk_fp8_f16 v1.l, 1                ; encoding: [0x01,0x00,0x72,0xd7,0xff,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+// NOGFX11: :[[@LINE-3]]:1: error: instruction not supported on this GPU
+// NOGFX12: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOGFX89: :[[@LINE-5]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 
 //---------------------------------------------------------------------------//
 // int literal, expected int operand
@@ -831,7 +837,8 @@ v_and_b32_e32 v0, -54321, v1
 s_mov_b64_e32 s[0:1], 0xdeadbeef
 // GFX11: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
 // GFX12: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
-// GFX1250: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[0:1], lit64(0xdeadbeef)     ; encoding: [0xfe,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde,0x00,0x00,0x00,0x00]
 // GFX89: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x01,0x80,0xbe,0xef,0xbe,0xad,0xde]
 // SICI: s_mov_b64 s[0:1], 0xdeadbeef            ; encoding: [0xff,0x04,0x80,0xbe,0xef,0xbe,0xad,0xde]
 
@@ -844,7 +851,8 @@ v_and_b32_e32 v0, 0xdeadbeef, v1
 s_mov_b64_e32 s[0:1], 0xffffffff
 // GFX11: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
 // GFX12: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
-// GFX1250: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[0:1], lit64(0xffffffff)     ; encoding: [0xfe,0x01,0x80,0xbe,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
 // GFX89: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x01,0x80,0xbe,0xff,0xff,0xff,0xff]
 // SICI: s_mov_b64 s[0:1], 0xffffffff            ; encoding: [0xff,0x04,0x80,0xbe,0xff,0xff,0xff,0xff]
 
@@ -896,7 +904,8 @@ s_mov_b64 s[0:1], 1
 s_mov_b64 s[0:1], lit(1)
 // GFX11: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
 // GFX12: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
-// GFX1250: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: s_mov_b64 s[0:1], lit64(0x1)            ; encoding: [0xfe,0x01,0x80,0xbe,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 // GFX89: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x01,0x80,0xbe,0x01,0x00,0x00,0x00]
 // SICI: s_mov_b64 s[0:1], lit(0x1)              ; encoding: [0xff,0x04,0x80,0xbe,0x01,0x00,0x00,0x00]
 
@@ -1107,7 +1116,9 @@ v_trunc_f64 v[0:1], 0x1fffffff000
 
 buffer_atomic_add v0, off, s[0:3], scc offset:4095
 // GFX11: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xd4,0xe0,0x00,0x00,0x00,0xfd]
-// GFX12XX: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX12: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX1250-ASM: buffer_atomic_add_u32 v0, off, s[0:3], src_scc offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
+// GFX1250-DIS: buffer_atomic_add_u32 v0, off, s[0:3], m0 offset:4095 ; encoding: [0x7d,0x40,0x0d,0xc4,0x00,0x00,0x80,0x00,0x00,0xff,0x0f,0x00]
 // GFX89: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0x08,0xe1,0x00,0x00,0x00,0xfd]
 // SICI: buffer_atomic_add v0, off, s[0:3], src_scc offset:4095 ; encoding: [0xff,0x0f,0xc8,0xe0,0x00,0x00,0x00,0xfd]
 
@@ -1241,9 +1252,11 @@ v_ceil_f16 v0, neg(vccz)
 
 v_ceil_f16 v0, abs(scc)
 // GFX11: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
-// GFX12XX: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX12: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX1250-ASM: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
+// GFX1250-DIS: v_ceil_f16_e64 v0.l, |src_scc|          ; encoding: [0x00,0x01,0xdc,0xd5,0xfd,0x00,0x00,0x00]
 // GFX89: v_ceil_f16_e64 v0, |src_scc|            ; encoding: [0x00,0x01,0x85,0xd1,0xfd,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
 // NOSICIVI: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_ceil_f64 v[5:6], |execz|
@@ -1461,10 +1474,12 @@ v_cmp_eq_i64 vcc, src_shared_base, v[0:1]
 
 v_max_f16 v0, src_shared_base, v0
 // GFX11: v_max_f16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x72]
-// GFX12XX: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// GFX12: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// GFX1250-ASM: v_max_num_f16_e32 v0, src_shared_base, v0 ; encoding: [0xeb,0x00,0x00,0x62]
+// GFX1250-DIS: v_max_num_f16_e32 v0.l, src_shared_base, v0.l ; encoding: [0xeb,0x00,0x00,0x62]
 // GFX9: v_max_f16_e32 v0, src_shared_base, v0   ; encoding: [0xeb,0x00,0x00,0x5a]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-5]]:15: error: src_shared_base register not available on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:15: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_max_f32 v0, src_shared_base, v0
@@ -1493,18 +1508,22 @@ v_pk_add_f16 v0, src_shared_base, v0
 
 v_ceil_f16 v0, neg(src_shared_base)
 // GFX11: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
-// GFX12XX: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// GFX12: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// GFX1250-ASM: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
+// GFX1250-DIS: v_ceil_f16_e64 v0.l, -src_shared_base   ; encoding: [0x00,0x00,0xdc,0xd5,0xeb,0x00,0x00,0x20]
 // GFX9: v_ceil_f16_e64 v0, -src_shared_base     ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:20: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f16 v0, abs(src_shared_base)
 // GFX11: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
-// GFX12XX: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// GFX12: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// GFX1250-ASM: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
+// GFX1250-DIS: v_ceil_f16_e64 v0.l, |src_shared_base|  ; encoding: [0x00,0x01,0xdc,0xd5,0xeb,0x00,0x00,0x00]
 // GFX9: v_ceil_f16_e64 v0, |src_shared_base|    ; encoding: [0x00,0x01,0x85,0xd1,0xeb,0x00,0x00,0x00]
-// NOSICI: :[[@LINE-4]]:1: error: instruction not supported on this GPU
-// NOVI: :[[@LINE-5]]:20: error: src_shared_base register not available on this GPU
+// NOSICI: :[[@LINE-6]]:1: error: instruction not supported on this GPU
+// NOVI: :[[@LINE-7]]:20: error: src_shared_base register not available on this GPU
 // NOSICIVI: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 
 v_ceil_f64 v[5:6], |src_shared_base|
@@ -1792,33 +1811,41 @@ v_pk_add_f16 v255, vccz, execz
 
 v_sqrt_f32 v2, lit(123)
 // GFX11: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f32_e32 v2, 0x7b                 ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
 v_sqrt_f32 v2, abs(lit(123))
 // GFX11: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX12: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f32_e32 v2, 0x7b                 ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX89: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x4e,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // SICI: v_sqrt_f32_e32 v2, lit(0x7b)            ; encoding: [0xff,0x66,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
 v_sqrt_f32 v2, lit(123.0)
 // GFX11: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
-// GFX12XX: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX12: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX1250-ASM: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
+// GFX1250-DIS: v_sqrt_f32_e32 v2, 0x42f60000           ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
 // GFX89: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x4e,0x04,0x7e,0x00,0x00,0xf6,0x42]
 // SICI: v_sqrt_f32_e32 v2, lit(0x42f60000)      ; encoding: [0xff,0x66,0x04,0x7e,0x00,0x00,0xf6,0x42]
 
 v_sqrt_f64 v[2:3], lit(123.0)
 // GFX11: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 // GFX12: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
-// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f64_e32 v[2:3], lit64(0x405ec000) ; encoding: [0xfe,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40,0x00,0x00,0x00,0x00]
 // GFX89: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x50,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 // SICI: v_sqrt_f64_e32 v[2:3], lit(0x405ec000)  ; encoding: [0xff,0x68,0x04,0x7e,0x00,0xc0,0x5e,0x40]
 
 v_sqrt_f64 v[2:3], lit(123)
 // GFX11: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // GFX12: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
-// GFX1250: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-ASM: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+// GFX1250-DIS: v_sqrt_f64_e32 v[2:3], lit64(0x7b)      ; encoding: [0xfe,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 // GFX89: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x50,0x04,0x7e,0x7b,0x00,0x00,0x00]
 // SICI: v_sqrt_f64_e32 v[2:3], lit(0x7b)        ; encoding: [0xff,0x68,0x04,0x7e,0x7b,0x00,0x00,0x00]
 
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
index 1f5df65..72c800f 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
@@ -111,9 +111,6 @@
 # CHECK: tlbie 4
 0x7c 0x00 0x22 0x64
 
-# CHECK: tlbie 4
-0x7c 0x00 0x22 0x64
-
 # CHECK: rfi
 0x4c 0x00 0x00 0x64
 # CHECK: rfci
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9vector.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9.txt
index 1a79648..a857168 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9vector.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-p9.txt
@@ -2,3 +2,9 @@
 
 # CHECK: mtvsrdd 6, 0, 3
 0x66 0x1b 0xc0 0x7c
+
+# CHECK: tlbie 8, 10
+0x64, 0x42, 0x40, 0x7d
+
+# CHECK: tlbie 8, 10, 2, 1, 0
+0x64, 0x42, 0x4a, 0x7d
diff --git a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
index f19cca8..a7434a2 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoaddr.ll
@@ -4,6 +4,10 @@
 ; The ptrtoaddr folds are also valid for pointers that have external state.
 target datalayout = "pe1:64:64:64:32"
 
+declare void @use.i1(i1)
+declare void @use.i32(i32)
+declare void @use.i64(i64)
+
 ; ptrtoaddr result type is fixed, and can't be combined with integer cast.
 define i32 @ptrtoaddr_trunc(ptr %p) {
 ; CHECK-LABEL: define i32 @ptrtoaddr_trunc(
@@ -171,3 +175,65 @@ define i128 @sub_zext_ptrtoint_ptrtoaddr_addrsize(ptr addrspace(1) %p, i32 %offs
   %sub = sub i128 %p2.addr.ext, %p.int.ext
   ret i128 %sub
 }
+
+; The uses in icmp, ptrtoint, ptrtoaddr should be replaced. The one in the
+; return value should not, as the provenance differs.
+define ptr @gep_sub_ptrtoaddr_different_obj(ptr %p, ptr %p2, ptr %p3) {
+; CHECK-LABEL: define ptr @gep_sub_ptrtoaddr_different_obj(
+; CHECK-SAME: ptr [[P:%.*]], ptr [[P2:%.*]], ptr [[P3:%.*]]) {
+; CHECK-NEXT:    [[P_ADDR:%.*]] = ptrtoaddr ptr [[P]] to i64
+; CHECK-NEXT:    [[P2_ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[P2_ADDR]], [[P_ADDR]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P2]], [[P3]]
+; CHECK-NEXT:    call void @use.i1(i1 [[CMP]])
+; CHECK-NEXT:    [[INT:%.*]] = ptrtoint ptr [[P2]] to i64
+; CHECK-NEXT:    call void @use.i64(i64 [[INT]])
+; CHECK-NEXT:    [[ADDR:%.*]] = ptrtoaddr ptr [[P2]] to i64
+; CHECK-NEXT:    call void @use.i64(i64 [[ADDR]])
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+  %p.addr = ptrtoaddr ptr %p to i64
+  %p2.addr = ptrtoaddr ptr %p2 to i64
+  %sub = sub i64 %p2.addr, %p.addr
+  %gep = getelementptr i8, ptr %p, i64 %sub
+  %cmp = icmp eq ptr %gep, %p3
+  call void @use.i1(i1 %cmp)
+  %int = ptrtoint ptr %gep to i64
+  call void @use.i64(i64 %int)
+  %addr = ptrtoaddr ptr %gep to i64
+  call void @use.i64(i64 %addr)
+  ret ptr %gep
+}
+
+; The use in ptrtoaddr should be replaced. The uses in ptrtoint and icmp should
+; not be replaced, as the non-address bits differ. The use in the return value
+; should not be replaced as the provenace differs.
+define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(ptr addrspace(1) %p, ptr addrspace(1) %p2, ptr addrspace(1) %p3) {
+; CHECK-LABEL: define ptr addrspace(1) @gep_sub_ptrtoaddr_different_obj_addrsize(
+; CHECK-SAME: ptr addrspace(1) [[P:%.*]], ptr addrspace(1) [[P2:%.*]], ptr addrspace(1) [[P3:%.*]]) {
+; CHECK-NEXT:    [[P_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P]] to i32
+; CHECK-NEXT:    [[P2_ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[P2_ADDR]], [[P_ADDR]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[P]], i32 [[SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr addrspace(1) [[GEP]], [[P3]]
+; CHECK-NEXT:    call void @use.i1(i1 [[CMP]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
+; CHECK-NEXT:    [[INT:%.*]] = trunc i64 [[TMP1]] to i32
+; CHECK-NEXT:    call void @use.i32(i32 [[INT]])
+; CHECK-NEXT:    [[ADDR:%.*]] = ptrtoaddr ptr addrspace(1) [[P2]] to i32
+; CHECK-NEXT:    call void @use.i32(i32 [[ADDR]])
+; CHECK-NEXT:    ret ptr addrspace(1) [[GEP]]
+;
+  %p.addr = ptrtoaddr ptr addrspace(1) %p to i32
+  %p2.addr = ptrtoaddr ptr addrspace(1) %p2 to i32
+  %sub = sub i32 %p2.addr, %p.addr
+  %gep = getelementptr i8, ptr addrspace(1) %p, i32 %sub
+  %cmp = icmp eq ptr addrspace(1) %gep, %p3
+  call void @use.i1(i1 %cmp)
+  %int = ptrtoint ptr addrspace(1) %gep to i32
+  call void @use.i32(i32 %int)
+  %addr = ptrtoaddr ptr addrspace(1) %gep to i32
+  call void @use.i32(i32 %addr)
+  ret ptr addrspace(1) %gep
+}
diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
index 7bf4fbd..91b3099 100644
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -1,6 +1,5 @@
-; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
+; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
 
 ; Check that we version this loop with speculating the value 1 for symbolic
 ; strides.  This also checks that the symbolic stride information is correctly
@@ -8,50 +7,60 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; vectorize because we couldn't determine the array bounds for the required
 ; memchecks.
 
-; CHECK-LABEL: test
-define void @test(ptr  %A, i64 %AStride,
-                  ptr  %B, i32 %BStride,
-                  ptr  %C, i64 %CStride, i32 %N) {
+define void @test(ptr noalias %A, i64 %AStride, ptr noalias %B, i32 %BStride, ptr noalias %C, i64 %CStride) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr noalias [[A:%.*]], i64 [[ASTRIDE:%.*]], ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]], ptr noalias [[C:%.*]], i64 [[CSTRIDE:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK:       [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT:    [[IDENT_CHECK1:%.*]] = icmp ne i32 [[BSTRIDE]], 1
+; CHECK-NEXT:    [[IDENT_CHECK2:%.*]] = icmp ne i64 [[CSTRIDE]], 1
+; CHECK-NEXT:    [[DIFF_CHECK6:%.*]] = icmp ne i64 [[ASTRIDE]], 1
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = or i1 [[IDENT_CHECK1]], [[IDENT_CHECK2]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK6]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = mul nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br [[EXIT:label %.*]]
+; CHECK:       [[SCALAR_PH]]:
+;
 entry:
-  %cmp13 = icmp eq i32 %N, 0
-  br i1 %cmp13, label %for.end, label %for.body.preheader
-
-; CHECK-DAG: icmp ne i64 %AStride, 1
-; CHECK-DAG: icmp ne i32 %BStride, 1
-; CHECK-DAG: icmp ne i64 %CStride, 1
-; CHECK: or
-; CHECK: or
-; CHECK: br
-
-; CHECK: vector.body
-; CHECK: load <2 x i32>
+  br label %loop
 
-for.body.preheader:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %iv.trunc = trunc i64 %indvars.iv to i32
+loop:
+  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+  %iv.trunc = trunc i64 %iv to i32
   %mul = mul i32 %iv.trunc, %BStride
   %mul64 = zext i32 %mul to i64
-  %arrayidx = getelementptr inbounds i32, ptr %B, i64 %mul64
-  %0 = load i32, ptr %arrayidx, align 4
-  %mul2 = mul nsw i64 %indvars.iv, %CStride
-  %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %mul2
-  %1 = load i32, ptr %arrayidx3, align 4
+  %gep.x = getelementptr inbounds i32, ptr %B, i64 %mul64
+  %0 = load i32, ptr %gep.x, align 4
+  %mul2 = mul nsw i64 %iv, %CStride
+  %gep.c = getelementptr inbounds i32, ptr %C, i64 %mul2
+  %1 = load i32, ptr %gep.c, align 4
   %mul4 = mul nsw i32 %1, %0
-  %mul3 = mul nsw i64 %indvars.iv, %AStride
-  %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 %mul3
-  store i32 %mul4, ptr %arrayidx7, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %N
-  br i1 %exitcond, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
+  %mul3 = mul nsw i64 %iv, %AStride
+  %gep.a = getelementptr inbounds i32, ptr %A, i64 %mul3
+  store i32 %mul4, ptr %gep.a, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %ec = icmp eq i64 %iv.next, 100
+  br i1 %ec, label %exit, label %loop
+
+exit:
   ret void
 }
 
@@ -59,36 +68,150 @@ for.end:
 ; replacing the symbolic stride '%conv'.
 ; PR18480
 
-; CHECK-LABEL: fn1
-; CHECK: load <2 x double>
-
 define void @fn1(ptr noalias %x, ptr noalias %c, double %a) {
+; CHECK-LABEL: define void @fn1(
+; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[C:%.*]], double [[A:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[A]] to i32
+; CHECK-NEXT:    [[CONV2:%.*]] = add i32 [[CONV]], 4
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[CONV2]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]]
+; CHECK:       [[LOOP_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[CONV2]] to i64
+; CHECK-NEXT:    br label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK:       [[VECTOR_SCEVCHECK]]:
+; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[CONV]], 1
+; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP2]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT:    store <2 x double> [[WIDE_LOAD]], ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+;
 entry:
   %conv = fptosi double %a to i32
   %conv2 = add i32 %conv, 4
   %cmp8 = icmp sgt i32 %conv2, 0
-  br i1 %cmp8, label %for.body.preheader, label %for.end
+  br i1 %cmp8, label %loop, label %exit
 
-for.body.preheader:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %0 = trunc i64 %indvars.iv to i32
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %0 = trunc i64 %iv to i32
   %mul = mul nsw i32 %0, %conv
-  %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds double, ptr %x, i64 %idxprom
-  %1 = load double, ptr %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds double, ptr %c, i64 %indvars.iv
-  store double %1, ptr %arrayidx3, align 8
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %conv2
-  br i1 %exitcond, label %for.end.loopexit, label %for.body
+  %mul.ext = sext i32 %mul to i64
+  %gep.x = getelementptr inbounds double, ptr %x, i64 %mul.ext
+  %1 = load double, ptr %gep.x, align 8
+  %gep.c = getelementptr inbounds double, ptr %c, i64 %iv
+  store double %1, ptr %gep.c, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %iv.trunc = trunc i64 %iv.next to i32
+  %ec = icmp eq i32 %iv.trunc, %conv2
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Make sure we do not crash when the stride is poison.
+; Test for https://github.com/llvm/llvm-project/issues/162922.
+define void @stride_poison(ptr %dst) mustprogress {
+; CHECK-LABEL: define void @stride_poison(
+; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 poison, i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 99, [[UMAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[N_VEC]], poison
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], poison
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], poison
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], poison
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
+; CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+; CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH:.*]]
+; CHECK:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.dst = getelementptr i8, ptr %dst, i64 %iv
+  store i8 0, ptr %gep.dst, align 1
+  %iv.next = add nuw nsw i64 %iv, poison
+  %ec = icmp samesign ult i64 %iv, 100
+  br i1 %ec, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; Make sure we do not crash when the stride is undef.
+define void @stride_undef(ptr %dst) mustprogress {
+; CHECK-LABEL: define void @stride_undef(
+; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 undef, i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = udiv i64 99, [[UMAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[N_VEC]], undef
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], undef
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], undef
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
+; CHECK-NEXT:    store i8 0, ptr [[TMP5]], align 1
+; CHECK-NEXT:    store i8 0, ptr [[TMP6]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH:.*]]
+; CHECK:       [[SCALAR_PH]]:
+;
+entry:
+  br label %loop
 
-for.end.loopexit:
-  br label %for.end
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.dst = getelementptr i8, ptr %dst, i64 %iv
+  store i8 0, ptr %gep.dst, align 1
+  %iv.next = add nuw nsw i64 %iv, undef
+  %ec = icmp samesign ult i64 %iv, 100
+  br i1 %ec, label %loop, label %exit
 
-for.end:
+exit:
   ret void
 }
diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
index c2e632d..b1cf0e4 100644
--- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
@@ -61,30 +61,49 @@ return:
   ret i32 %p
 }
 
-; Check that switch's of powers of two range is not reduced if default case is reachable
+; Check that switch's of powers of two range with the default case reachable is reduced
+; w/ Zbb enabled, by jumping non-power-of-two inputs to the default block.
 define i32 @switch_of_powers_reachable_default(i32 %x) {
-; CHECK-LABEL: @switch_of_powers_reachable_default(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    switch i32 [[X:%.*]], label [[RETURN:%.*]] [
-; CHECK-NEXT:      i32 1, label [[BB1:%.*]]
-; CHECK-NEXT:      i32 8, label [[BB2:%.*]]
-; CHECK-NEXT:      i32 16, label [[BB3:%.*]]
-; CHECK-NEXT:      i32 32, label [[BB4:%.*]]
-; CHECK-NEXT:      i32 64, label [[BB5:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       bb1:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb2:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb3:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb4:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       bb5:
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[P]]
+; RV64I-LABEL: @switch_of_powers_reachable_default(
+; RV64I-NEXT:  entry:
+; RV64I-NEXT:    switch i32 [[X:%.*]], label [[RETURN:%.*]] [
+; RV64I-NEXT:      i32 1, label [[BB1:%.*]]
+; RV64I-NEXT:      i32 8, label [[BB2:%.*]]
+; RV64I-NEXT:      i32 16, label [[BB3:%.*]]
+; RV64I-NEXT:      i32 32, label [[BB4:%.*]]
+; RV64I-NEXT:      i32 64, label [[BB5:%.*]]
+; RV64I-NEXT:    ]
+; RV64I:       bb1:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb2:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb3:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb4:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       bb5:
+; RV64I-NEXT:    br label [[RETURN]]
+; RV64I:       return:
+; RV64I-NEXT:    [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ]
+; RV64I-NEXT:    ret i32 [[P]]
+;
+; RV64ZBB-LABEL: @switch_of_powers_reachable_default(
+; RV64ZBB-NEXT:  entry:
+; RV64ZBB-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]])
+; RV64ZBB-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; RV64ZBB-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[RETURN:%.*]]
+; RV64ZBB:       entry.split:
+; RV64ZBB-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; RV64ZBB-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; RV64ZBB-NEXT:    br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]]
+; RV64ZBB:       switch.lookup:
+; RV64ZBB-NEXT:    [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; RV64ZBB-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i64 0, i64 [[TMP4]]
+; RV64ZBB-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; RV64ZBB-NEXT:    br label [[RETURN]]
+; RV64ZBB:       return:
+; RV64ZBB-NEXT:    [[P:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ -1, [[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ]
+; RV64ZBB-NEXT:    ret i32 [[P]]
 ;
 entry:
   switch i32 %x, label %default_case [
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index 49eb199..aa95b3f 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -34,3 +34,97 @@ return:
   %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
   ret i32 %phi
 }
+
+define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK:       [[SWITCH_LOOKUP]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ 5, %[[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  switch i32 %arg, label %default_case [
+  i32 1,  label %bb1
+  i32 8,  label %bb2
+  i32 16, label %bb3
+  i32 32, label %bb4
+  i32 64, label %bb5
+  ]
+
+default_case: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+  %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ 5, %default_case ]
+  ret i32 %phi
+}
+
+define i32 @switch_of_powers_two_default_reachable_multipreds(i32 %arg, i1 %cond) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable_multipreds(
+; CHECK-SAME: i32 [[ARG:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[COND]], label %[[SWITCH:.*]], label %[[RETURN:.*]]
+; CHECK:       [[SWITCH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[SWITCH_SPLIT:.*]], label %[[RETURN]]
+; CHECK:       [[SWITCH_SPLIT]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[TMP3]], i1 [[SWITCH_LOBIT]], i1 false
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK:       [[SWITCH_LOOKUP]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable_multipreds, i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH_SPLIT]] ], [ [[ARG]], %[[SWITCH]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  br i1 %cond, label %switch, label %default_case
+
+switch:
+  switch i32 %arg, label %default_case [
+  i32 1,  label %bb1
+  i32 8,  label %bb2
+  i32 16, label %bb3
+  i32 32, label %bb4
+  i32 64, label %bb5
+  ]
+
+default_case:
+  %pn = phi i32 [ 0, %entry ], [ %arg, %switch ]
+  br label %return
+
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+  %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ]
+  ret i32 %phi
+}